diff --git a/sycl/include/sycl/detail/generic_type_traits.hpp b/sycl/include/sycl/detail/generic_type_traits.hpp index 3b0ce7988f576..cf00f3d01f381 100644 --- a/sycl/include/sycl/detail/generic_type_traits.hpp +++ b/sycl/include/sycl/detail/generic_type_traits.hpp @@ -252,6 +252,16 @@ inline constexpr bool is_genfloatptr_marray_v = (IsDecorated == access::decorated::yes || IsDecorated == access::decorated::no); +template +using is_byte_t = typename +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + std::is_same; +#else + std::false_type; +#endif + +template inline constexpr bool is_byte_v = is_byte_t::value; + template using make_floating_point_t = make_type_t; @@ -332,6 +342,8 @@ template auto convertToOpenCLType(T &&x) { std::declval()))>, no_ref::size()>; #ifdef __SYCL_DEVICE_ONLY__ + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // TODO: for some mysterious reasons on NonUniformGroups E2E tests fail if // we use the "else" version only. I suspect that's an issues with // non-uniform groups implementation. @@ -340,6 +352,10 @@ template auto convertToOpenCLType(T &&x) { else return static_cast( x.template as()); +#else // __INTEL_PREVIEW_BREAKING_CHANGES + return sycl::bit_cast(x); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + #else return x.template as(); #endif diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index c018fce5bcfa3..6552daa560e9a 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -558,6 +558,15 @@ NativeToT convertImpl(NativeFromT Value) { } } +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) +template +auto ConvertImpl(std::byte val) { + return convertImpl( + (std::int8_t)val); +} +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/bfloat16.hpp b/sycl/include/sycl/ext/oneapi/bfloat16.hpp index 3a16dcd244b4c..6792262ec21a0 100644 --- a/sycl/include/sycl/ext/oneapi/bfloat16.hpp +++ b/sycl/include/sycl/ext/oneapi/bfloat16.hpp @@ -32,6 +32,7 @@ bfloat16 bitsToBfloat16(const Bfloat16StorageT Value); // sycl::vec support namespace bf16 { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES #ifdef __SYCL_DEVICE_ONLY__ using Vec2StorageT = Bfloat16StorageT __attribute__((ext_vector_type(2))); using Vec3StorageT = Bfloat16StorageT __attribute__((ext_vector_type(3))); @@ -45,6 +46,7 @@ using Vec4StorageT = std::array; using Vec8StorageT = std::array; using Vec16StorageT = std::array; #endif +#endif // __INTEL_PREVIEW_BREAKING_CHANGES } // namespace bf16 } // namespace detail diff --git a/sycl/include/sycl/half_type.hpp b/sycl/include/sycl/half_type.hpp index 951146f2cdfbb..799ff9fb186e9 100644 --- a/sycl/include/sycl/half_type.hpp +++ b/sycl/include/sycl/half_type.hpp @@ -249,11 +249,14 @@ using StorageT = _Float16; using BIsRepresentationT = _Float16; using VecElemT = _Float16; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES using Vec2StorageT = VecElemT __attribute__((ext_vector_type(2))); using Vec3StorageT = VecElemT __attribute__((ext_vector_type(3))); using Vec4StorageT = VecElemT __attribute__((ext_vector_type(4))); using Vec8StorageT = VecElemT __attribute__((ext_vector_type(8))); using Vec16StorageT = VecElemT __attribute__((ext_vector_type(16))); +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + #else // SYCL_DEVICE_ONLY using StorageT = detail::host_half_impl::half; // No need to extract underlying data type for built-in functions operating on @@ -261,6 +264,7 @@ using StorageT = detail::host_half_impl::half; using BIsRepresentationT = half; using VecElemT = half; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES // On the host side we cannot use OpenCL cl_half# types as an underlying type // for vec because they are actually defined as an integer type under the // hood. As a result half values will be converted to the integer and passed @@ -270,6 +274,8 @@ using Vec3StorageT = std::array; using Vec4StorageT = std::array; using Vec8StorageT = std::array; using Vec16StorageT = std::array; +#endif // __INTEL_PREVIEW_BREAKING_CHANGES + #endif // SYCL_DEVICE_ONLY #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index f1bf7fcfcc24d..1d38fc08b287d 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -26,10 +26,6 @@ #error "SYCL device compiler is built without ext_vector_type support" #endif -#if defined(__SYCL_DEVICE_ONLY__) -#define __SYCL_USE_EXT_VECTOR_TYPE__ -#endif - #include // for decorated, address_space #include // for half, cl_char, cl_int #include // for ArrayCreator, RepeatV... @@ -45,8 +41,9 @@ #include // bfloat16 +#include // for std::min #include // for array -#include // for assert +#include // for assert #include // for size_t, NULL, byte #include // for uint8_t, int16_t, int... #include // for divides, multiplies @@ -86,81 +83,10 @@ struct elem { }; namespace detail { -// select_apply_cl_t selects from T8/T16/T32/T64 basing on -// sizeof(_IN). expected to handle scalar types in _IN. -template -using select_apply_cl_t = std::conditional_t< - sizeof(_IN) == 1, T8, - std::conditional_t>>; - -template struct vec_helper { - using RetType = T; - static constexpr RetType get(T value) { return value; } - static constexpr RetType set(T value) { return value; } -}; -template <> struct vec_helper { - using RetType = select_apply_cl_t; - static constexpr RetType get(bool value) { return value; } - static constexpr RetType set(bool value) { return value; } -}; - -template <> struct vec_helper { - using RetType = sycl::ext::oneapi::bfloat16; - using BFloat16StorageT = sycl::ext::oneapi::detail::Bfloat16StorageT; - static constexpr RetType get(BFloat16StorageT value) { -#if defined(__SYCL_BITCAST_IS_CONSTEXPR) - return sycl::bit_cast(value); -#else - // awkward workaround. sycl::bit_cast isn't constexpr in older GCC - // C++20 will give us both std::bit_cast and constexpr reinterpet for void* - // but neither available yet. - union { - sycl::ext::oneapi::bfloat16 bf16; - sycl::ext::oneapi::detail::Bfloat16StorageT storage; - } result = {}; - result.storage = value; - return result.bf16; -#endif - } - - static constexpr RetType get(RetType value) { return value; } - - static constexpr BFloat16StorageT set(RetType value) { -#if defined(__SYCL_BITCAST_IS_CONSTEXPR) - return sycl::bit_cast(value); -#else - union { - sycl::ext::oneapi::bfloat16 bf16; - sycl::ext::oneapi::detail::Bfloat16StorageT storage; - } result = {}; - result.bf16 = value; - return result.storage; -#endif - } -}; - -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -template <> struct vec_helper { - using RetType = std::uint8_t; - static constexpr RetType get(std::byte value) { return (RetType)value; } - static constexpr RetType set(std::byte value) { return (RetType)value; } - static constexpr std::byte get(std::uint8_t value) { - return (std::byte)value; - } - static constexpr std::byte set(std::uint8_t value) { - return (std::byte)value; - } -}; -#endif - template class OperationCurrentT, int... Indexes> class SwizzleOp; -template struct VecStorage; - // Element type for relational operator return value. template using rel_t = typename std::conditional_t< @@ -178,8 +104,18 @@ using rel_t = typename std::conditional_t< template class GetOp { public: using DataT = T; - DataT getValue(size_t) const { return (DataT)0; } - DataT operator()(DataT, DataT) { return (DataT)0; } + DataT getValue(size_t) const { + if constexpr (std::is_same_v) + return DataT{0.0f}; + else + return (DataT)0; + } + DataT operator()(DataT, DataT) { + if constexpr (std::is_same_v) + return DataT{0.0f}; + else + return (DataT)0; + } }; // Forward declarations @@ -188,212 +124,53 @@ class RoundedRangeKernel; template class RoundedRangeKernelWithKH; -// Vectors of size 1 are handled separately and therefore 1 is not included in -// the check below. -constexpr bool isValidVectorSize(int N) { - return N == 2 || N == 3 || N == 4 || N == 8 || N == 16; -} -template struct VecStorage { - static_assert( - isValidVectorSize(N) || N == 1, - "Incorrect number of elements for sycl::vec: only 1, 2, 3, 4, 8 " - "or 16 are supported"); - static_assert(!std::is_same_v, "Incorrect data type for sycl::vec"); -}; - -#ifdef __SYCL_DEVICE_ONLY__ -// device always has ext vector support, but for huge vectors -// we switch to std::array, so that we can use a smaller alignment (64) -// this is to support MSVC, which has a max of 64 for direct params. -template struct VecStorageImpl { - static constexpr size_t Num = (N == 3) ? 4 : N; - static constexpr size_t Sz = Num * sizeof(T); - using DataType = - typename std::conditional>::type; - using VectorDataType = T __attribute__((ext_vector_type(N))); -}; -#else // __SYCL_DEVICE_ONLY__ -template struct VecStorageImpl { - using DataType = std::array; -}; -#endif // __SYCL_DEVICE_ONLY__ - -// Single element bool -template <> struct VecStorage { - using DataType = bool; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = bool; -#endif // __SYCL_DEVICE_ONLY__ -}; - -// Multiple element bool -template -struct VecStorage> { - using DataType = - typename VecStorageImpl, - N>::DataType; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = - typename VecStorageImpl, - N>::VectorDataType; -#endif // __SYCL_DEVICE_ONLY__ -}; - -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) -// Single element byte. Multiple elements will propagate through a later -// specialization. -template <> struct VecStorage { - using DataType = std::int8_t; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = std::int8_t; -#endif // __SYCL_DEVICE_ONLY__ -}; -#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - -// Single element signed integers -template -struct VecStorage>> { - using DataType = T; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = DataType; -#endif // __SYCL_DEVICE_ONLY__ -}; - -// Single element unsigned integers -template -struct VecStorage>> { - using DataType = T; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = DataType; -#endif // __SYCL_DEVICE_ONLY__ -}; - -// Single element floating-point (except half/bfloat16) +// OpenCL data type to convert to. template -struct VecStorage< - T, 1, - typename std::enable_if_t && is_sgenfloat_v>> { - using DataType = T; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = DataType; -#endif // __SYCL_DEVICE_ONLY__ -}; -// Multiple elements signed/unsigned integers and floating-point (except -// half/bfloat16) -template -struct VecStorage< - T, N, - typename std::enable_if_t || - (is_sgenfloat_v && !is_half_or_bf16_v))>> { - using DataType = - typename VecStorageImpl::DataType, N>::DataType; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = - typename VecStorageImpl::DataType, - N>::VectorDataType; -#endif // __SYCL_DEVICE_ONLY__ -}; - -// Single element half -template <> struct VecStorage { - using DataType = sycl::detail::half_impl::StorageT; -#ifdef __SYCL_DEVICE_ONLY__ - using VectorDataType = sycl::detail::half_impl::StorageT; -#endif // __SYCL_DEVICE_ONLY__ -}; - -// Multiple elements half -#if defined(__SYCL_DEVICE_ONLY__) -#define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \ - template <> struct VecStorage { \ - using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \ - using VectorDataType = sycl::detail::half_impl::Vec##Num##StorageT; \ - }; -#else // defined(__SYCL_DEVICE_ONLY__) -#define __SYCL_DEFINE_HALF_VECSTORAGE(Num) \ - template <> struct VecStorage { \ - using DataType = sycl::detail::half_impl::Vec##Num##StorageT; \ - }; -#endif // defined(__SYCL_DEVICE_ONLY__) - -__SYCL_DEFINE_HALF_VECSTORAGE(2) -__SYCL_DEFINE_HALF_VECSTORAGE(3) -__SYCL_DEFINE_HALF_VECSTORAGE(4) -__SYCL_DEFINE_HALF_VECSTORAGE(8) -__SYCL_DEFINE_HALF_VECSTORAGE(16) -#undef __SYCL_DEFINE_HALF_VECSTORAGE - -// Single element bfloat16 -template <> struct VecStorage { - using DataType = sycl::ext::oneapi::detail::Bfloat16StorageT; - // using VectorDataType = sycl::ext::oneapi::bfloat16; - using VectorDataType = sycl::ext::oneapi::detail::Bfloat16StorageT; -}; -// Multiple elements bfloat16 -#define __SYCL_DEFINE_BF16_VECSTORAGE(Num) \ - template <> struct VecStorage { \ - using DataType = sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \ - using VectorDataType = \ - sycl::ext::oneapi::detail::bf16::Vec##Num##StorageT; \ - }; -__SYCL_DEFINE_BF16_VECSTORAGE(2) -__SYCL_DEFINE_BF16_VECSTORAGE(3) -__SYCL_DEFINE_BF16_VECSTORAGE(4) -__SYCL_DEFINE_BF16_VECSTORAGE(8) -__SYCL_DEFINE_BF16_VECSTORAGE(16) -#undef __SYCL_DEFINE_BF16_VECSTORAGE +// clang-format off +using element_type_for_vector_t = typename map_type< + T, +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + std::byte, /*->*/ std::uint8_t, +#endif + bool, /*->*/ std::int8_t, + sycl::half, /*->*/ sycl::detail::half_impl::StorageT, + sycl::ext::oneapi::bfloat16, /*->*/ sycl::ext::oneapi::detail::Bfloat16StorageT, + T, /*->*/ T>::type; +// clang-format on } // namespace detail -template using vec_data = detail::vec_helper; +///////////////////////// class sycl::vec ///////////////////////// +// Provides a cross-patform vector class template that works efficiently on +// SYCL devices as well as in host C++ code. +template class vec { -template -using vec_data_t = typename detail::vec_helper::RetType; + static_assert(NumElements == 1 || NumElements == 2 || NumElements == 3 || + NumElements == 4 || NumElements == 8 || NumElements == 16, + "Invalid number of elements for sycl::vec: only 1, 2, 3, 4, 8 " + "or 16 are supported"); + static_assert(sizeof(bool) == sizeof(int8_t), "bool size is not 1 byte"); -///////////////////////// class sycl::vec ///////////////////////// -/// Provides a cross-patform vector class template that works efficiently on -/// SYCL devices as well as in host C++ code. -/// -/// \ingroup sycl_api -template class vec { - using DataT = Type; + static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. - using DataType = typename detail::VecStorage::DataType; + using DataType = std::array; - static constexpr bool IsHostHalf = - std::is_same_v && - std::is_same_v; +public: +#ifdef __SYCL_DEVICE_ONLY__ + // Type used for passing sycl::vec to SPIRV builtins. + // We can not use ext_vector_type(1) as it's not supported by SPIRV + // plugins (CTS fails). + using vector_t = typename std::conditional_t< + NumElements == 1, detail::element_type_for_vector_t, + detail::element_type_for_vector_t __attribute__(( + ext_vector_type(NumElements)))>; +#endif // __SYCL_DEVICE_ONLY__ +private: static constexpr bool IsBfloat16 = std::is_same_v; - static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements; - static constexpr size_t Sz = sizeof(DataT) * AdjustedNum; - static constexpr bool IsSizeGreaterThanMaxAlign = - (Sz > detail::MaxVecAlignment); - - // TODO: There is no support for vector half type on host yet. - // Also, when Sz is greater than alignment, we use std::array instead of - // vector extension. This is for MSVC compatibility, which has a max alignment - // of 64 for direct params. If we drop MSVC, we can have alignment the same as - // size and use vector extensions for all sizes. - static constexpr bool IsUsingArrayOnDevice = - (IsHostHalf || IsBfloat16 || IsSizeGreaterThanMaxAlign); - -#if defined(__SYCL_DEVICE_ONLY__) - static constexpr bool NativeVec = NumElements > 1 && !IsUsingArrayOnDevice; - static constexpr bool IsUsingArrayOnHost = false; // not compiling for host. -#else - static constexpr bool NativeVec = false; - static constexpr bool IsUsingArrayOnHost = true; // host always std::array. -#endif - static constexpr int getNumElements() { return NumElements; } // SizeChecker is needed for vec(const argTN &... args) ctor to validate args. @@ -411,7 +188,7 @@ template class vec { template static constexpr std::array VecToArray(const vec &V, std::index_sequence) { - return {static_cast(V.getValue(Is))...}; + return {static_cast(V[Is])...}; } template class T4, int... T5, std::size_t... Is> @@ -446,7 +223,9 @@ template class vec { } template static constexpr auto FlattenVecArgHelper(const T &A) { - return std::array{vec_data::get(static_cast(A))}; + // static_cast required to avoid narrowing conversion warning + // when T = unsigned long int and DataT_ = int. + return std::array{static_cast(A)}; } template struct FlattenVecArg { constexpr auto operator()(const T &A) const { @@ -541,205 +320,83 @@ template class vec { using EnableIfSuitableNumElements = typename std::enable_if_t::value>; - template - constexpr vec(const std::array, NumElements> &Arr, - std::index_sequence) - : m_Data{([&](vec_data_t v) constexpr { - if constexpr (std::is_same_v) - return v.value; - else - return vec_data_t(static_cast(v)); - })(Arr[Is])...} {} - public: + // Aliases required by SPEC to make sycl::vec consistent + // with that of marray and buffer. using element_type = DataT; using value_type = DataT; using rel_t = detail::rel_t; -#ifdef __SYCL_DEVICE_ONLY__ - using vector_t = - typename detail::VecStorage::VectorDataType; -#endif // __SYCL_DEVICE_ONLY__ + /****************** Constructors **************/ vec() = default; - constexpr vec(const vec &Rhs) = default; constexpr vec(vec &&Rhs) = default; - constexpr vec &operator=(const vec &Rhs) = default; - - // W/o this, things like "vec = vec" doesn't work. - template - typename std::enable_if_t && - std::is_convertible_v, rel_t>, - vec &> - operator=(const vec &Rhs) { - *this = Rhs.template as(); - return *this; - } - -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ - template - using EnableIfNotHostHalf = typename std::enable_if_t; - - template - using EnableIfHostHalf = typename std::enable_if_t; - - template - using EnableIfUsingArrayOnDevice = - typename std::enable_if_t; - - template - using EnableIfNotUsingArrayOnDevice = - typename std::enable_if_t; -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ - - template - using EnableIfUsingArray = - typename std::enable_if_t; - - template - using EnableIfNotUsingArray = - typename std::enable_if_t; - -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ - - template - explicit constexpr vec(const EnableIfNotUsingArrayOnDevice &arg) - : m_Data{DataType(vec_data::get(arg))} {} +private: + // Implementation detail for the next public ctor. + template + constexpr vec(const std::array &Arr, + std::index_sequence) + : m_Data{Arr[Is]...} {} - template - typename std::enable_if_t< - std::is_fundamental_v> || - detail::is_half_or_bf16_v>, - vec &> - operator=(const EnableIfNotUsingArrayOnDevice &Rhs) { - m_Data = (DataType)vec_data::get(Rhs); - return *this; - } +public: + explicit constexpr vec(const DataT &arg) + : vec{detail::RepeatValue(arg), + std::make_index_sequence()} {} - template - explicit constexpr vec(const EnableIfUsingArrayOnDevice &arg) - : vec{detail::RepeatValue( - static_cast>(arg)), + // Constructor from values of base type or vec of base type. Checks that + // base types are match and that the NumElements == sum of lengths of args. + template , + typename = EnableIfSuitableNumElements> + constexpr vec(const argTN &...args) + : vec{VecArgArrayCreator::Create(args...), std::make_index_sequence()} {} + /****************** Assignment Operators **************/ + constexpr vec &operator=(const vec &Rhs) = default; + + // Template required to prevent ambiguous overload with the copy assignment + // when NumElements == 1. The template prevents implicit conversion from + // vec<_, 1> to DataT. template typename std::enable_if_t< - std::is_fundamental_v> || + std::is_fundamental_v || detail::is_half_or_bf16_v>, vec &> - operator=(const EnableIfUsingArrayOnDevice &Rhs) { - for (int i = 0; i < NumElements; ++i) { - setValue(i, Rhs); - } + operator=(const DataT &Rhs) { + *this = vec{Rhs}; return *this; } -#else // __SYCL_USE_EXT_VECTOR_TYPE__ - explicit constexpr vec(const DataT &arg) - : vec{detail::RepeatValue( - static_cast>(arg)), - std::make_index_sequence()} {} + // W/o this, things like "vec = vec" doesn't work. template typename std::enable_if_t< - std::is_fundamental_v> || - detail::is_half_or_bf16_v>, - vec &> - operator=(const DataT &Rhs) { - for (int i = 0; i < NumElements; ++i) { - setValue(i, Rhs); - } + !std::is_same_v && std::is_convertible_v, vec &> + operator=(const vec &Rhs) { + *this = Rhs.template as(); return *this; } -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ - -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ - // Optimized naive constructors with NumElements of DataT values. - // We don't expect compilers to optimize vararg recursive functions well. - - // Helper type to make specific constructors available only for specific - // number of elements. - template - using EnableIfMultipleElems = typename std::enable_if_t< - std::is_convertible_v && NumElements == IdxNum, DataT>; - template - constexpr vec(const EnableIfMultipleElems<2, Ty> Arg0, - const EnableIfNotUsingArrayOnDevice Arg1) - : m_Data{vec_data::get(Arg0), vec_data::get(Arg1)} {} - template - constexpr vec(const EnableIfMultipleElems<3, Ty> Arg0, - const EnableIfNotUsingArrayOnDevice Arg1, const DataT Arg2) - : m_Data{vec_data::get(Arg0), vec_data::get(Arg1), - vec_data::get(Arg2)} {} - template - constexpr vec(const EnableIfMultipleElems<4, Ty> Arg0, - const EnableIfNotUsingArrayOnDevice Arg1, const DataT Arg2, - const Ty Arg3) - : m_Data{vec_data::get(Arg0), vec_data::get(Arg1), - vec_data::get(Arg2), vec_data::get(Arg3)} {} - template - constexpr vec(const EnableIfMultipleElems<8, Ty> Arg0, - const EnableIfNotUsingArrayOnDevice Arg1, const DataT Arg2, - const DataT Arg3, const DataT Arg4, const DataT Arg5, - const DataT Arg6, const DataT Arg7) - : m_Data{vec_data::get(Arg0), vec_data::get(Arg1), - vec_data::get(Arg2), vec_data::get(Arg3), - vec_data::get(Arg4), vec_data::get(Arg5), - vec_data::get(Arg6), vec_data::get(Arg7)} {} - template - constexpr vec(const EnableIfMultipleElems<16, Ty> Arg0, - const EnableIfNotUsingArrayOnDevice Arg1, const DataT Arg2, - const DataT Arg3, const DataT Arg4, const DataT Arg5, - const DataT Arg6, const DataT Arg7, const DataT Arg8, - const DataT Arg9, const DataT ArgA, const DataT ArgB, - const DataT ArgC, const DataT ArgD, const DataT ArgE, - const DataT ArgF) - : m_Data{vec_data::get(Arg0), vec_data::get(Arg1), - vec_data::get(Arg2), vec_data::get(Arg3), - vec_data::get(Arg4), vec_data::get(Arg5), - vec_data::get(Arg6), vec_data::get(Arg7), - vec_data::get(Arg8), vec_data::get(Arg9), - vec_data::get(ArgA), vec_data::get(ArgB), - vec_data::get(ArgC), vec_data::get(ArgD), - vec_data::get(ArgE), vec_data::get(ArgF)} {} -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ - - // Constructor from values of base type or vec of base type. Checks that - // base types are match and that the NumElements == sum of lengths of args. - template , - typename = EnableIfSuitableNumElements> - constexpr vec(const argTN &...args) - : vec{VecArgArrayCreator, argTN...>::Create(args...), - std::make_index_sequence()} {} #ifdef __SYCL_DEVICE_ONLY__ - template && - !std::is_same_v>> - constexpr vec(vector_t openclVector) { - if constexpr (!IsUsingArrayOnDevice) { - m_Data = openclVector; - } else { - m_Data = bit_cast(openclVector); - } - } - - operator vector_t() const { - if constexpr (!IsUsingArrayOnDevice) { - return m_Data; - } else { - auto ptr = bit_cast((&m_Data)->data()); - return *ptr; - } - } + template < + typename vector_t_ = vector_t, + typename = typename std::enable_if_t>> + constexpr vec(vector_t_ openclVector) { + m_Data = sycl::bit_cast(openclVector); + } + + /* @SYCL2020 + * Available only when: compiled for the device. + * Converts this SYCL vec instance to the underlying backend-native vector + * type defined by vector_t. + */ + operator vector_t() const { return sycl::bit_cast(m_Data); } #endif // __SYCL_DEVICE_ONLY__ // Available only when: NumElements == 1 template operator typename std::enable_if_t() const { - return vec_data::get(m_Data); + return m_Data[0]; } __SYCL2020_DEPRECATED("get_count() is deprecated, please use size() instead") @@ -750,86 +407,90 @@ template class vec { static constexpr size_t get_size() { return byte_size(); } static constexpr size_t byte_size() noexcept { return sizeof(m_Data); } + // We interpret bool as int8_t, std::byte as uint8_t for conversion to other + // types. + // clang-format off + template + using ConvertBoolAndByteT = typename detail::map_type< + T, +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + std::byte, /*->*/ std::uint8_t, +#endif + bool, /*->*/ std::int8_t, + T, /*->*/ T>::type; + // clang-format on + // convertImpl can't be called with the same From and To types and therefore // we need this version of convert which is mostly no-op. template - std::enable_if_t< - std::is_same_v, vec_data_t> || - std::is_same_v>, - detail::ConvertToOpenCLType_t>>, - vec> - convert() const { - static_assert(std::is_integral_v> || - detail::is_floating_point::value, + vec convert() const { + + using T = ConvertBoolAndByteT; + using R = ConvertBoolAndByteT; + static_assert(std::is_integral_v || detail::is_floating_point::value, "Unsupported convertT"); - if constexpr (!std::is_same_v) { - // Dummy conversion for cases like vec -> vec - vec Result; + + using OpenCLT = detail::ConvertToOpenCLType_t; + using OpenCLR = detail::ConvertToOpenCLType_t; + vec Result; + + // For conversion between bool -> signed char and byte -> uint8_t. + if constexpr (!std::is_same_v && + (std::is_same_v || std::is_same_v)) { for (size_t I = 0; I < NumElements; ++I) Result.setValue(I, static_cast(getValue(I))); - return Result; - } else { - // No conversion necessary + } else if constexpr (std::is_same_v) { return *this; - } - } + } else { - template - std::enable_if_t< - !std::is_same_v, vec_data_t> && - !std::is_same_v>, - detail::ConvertToOpenCLType_t>>, - vec> - convert() const { - static_assert(std::is_integral_v> || - detail::is_floating_point::value, - "Unsupported convertT"); - using T = vec_data_t; - using R = vec_data_t; - using OpenCLT = detail::ConvertToOpenCLType_t; - using OpenCLR = detail::ConvertToOpenCLType_t; - vec Result; +#ifdef __SYCL_DEVICE_ONLY__ + using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements))); + using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements))); + + auto NativeVector = sycl::bit_cast(*this); + using ConvertTVecType = typename vec::vector_t; -#if defined(__SYCL_DEVICE_ONLY__) - using OpenCLVecT = OpenCLT __attribute__((ext_vector_type(NumElements))); - using OpenCLVecR = OpenCLR __attribute__((ext_vector_type(NumElements))); - // Whole vector conversion can only be done, if: - constexpr bool canUseNativeVectorConvert = + // Whole vector conversion can only be done, if: + constexpr bool canUseNativeVectorConvert = #ifdef __NVPTX__ - // - we are not on CUDA, see intel/llvm#11840 - false && + // - we are not on CUDA, see intel/llvm#11840 + false && #endif - // - both vectors are represented using native vector types; - NativeVec && vec::NativeVec && - // - vec storage has an equivalent OpenCL native vector it is implicitly - // convertible to. There are some corner cases where it is not the - // case with char, long and long long types. - std::is_convertible_v && - std::is_convertible_v && - // - it is not a signed to unsigned (or vice versa) conversion - // see comments within 'convertImpl' for more details; - !detail::is_sint_to_from_uint::value && - // - destination type is not bool. bool is stored as integer under the - // hood and therefore conversion to bool looks like conversion between - // two integer types. Since bit pattern for true and false is not - // defined, there is no guarantee that integer conversion yields - // right results here; - !std::is_same_v; - if constexpr (canUseNativeVectorConvert) { - Result.m_Data = detail::convertImpl(m_Data); - } else -#endif // defined(__SYCL_DEVICE_ONLY__) - { - // Otherwise, we fallback to per-element conversion: - for (size_t I = 0; I < NumElements; ++I) { - Result.setValue( - I, vec_data::get( - detail::convertImpl( - vec_data::get(getValue(I))))); + NumElements > 1 && + // - vec storage has an equivalent OpenCL native vector it is + // implicitly convertible to. There are some corner cases where it + // is not the case with char, long and long long types. + std::is_convertible_v && + std::is_convertible_v && + // - it is not a signed to unsigned (or vice versa) conversion + // see comments within 'convertImpl' for more details; + !detail::is_sint_to_from_uint::value && + // - destination type is not bool. bool is stored as integer under the + // hood and therefore conversion to bool looks like conversion + // between two integer types. Since bit pattern for true and false + // is not defined, there is no guarantee that integer conversion + // yields right results here; + !std::is_same_v; + + if constexpr (canUseNativeVectorConvert) { + Result.m_Data = sycl::bit_cast( + detail::convertImpl(NativeVector)); + } else +#endif // __SYCL_DEVICE_ONLY__ + { + // Otherwise, we fallback to per-element conversion: + for (size_t I = 0; I < NumElements; ++I) { + auto val = + detail::convertImpl( + getValue(I)); + if constexpr (detail::is_byte_t::value) + Result.setValue(I, static_cast(val)); + else + Result.setValue(I, val); + } } } @@ -859,58 +520,10 @@ template class vec { return this; } - // ext_vector_type is used as an underlying type for sycl::vec on device. - // The problem is that for clang vector types the return of operator[] is a - // temporary and not a reference to the element in the vector. In practice - // reinterpret_cast(&m_Data)[i]; is working. According to - // http://llvm.org/docs/GetElementPtr.html#can-gep-index-into-vector-elements - // this is not disallowed now. But could probably be disallowed in the future. - // That is why tests are added to check that behavior of the compiler has - // not changed. - // // Implement operator [] in the same way for host and device. - // TODO: change host side implementation when underlying type for host side - // will be changed to std::array. - // NOTE: aliasing the incompatible types of bfloat16 may lead to problems if - // aggressively optimized. Specializing with noinline to avoid as workaround. - - template - typename std::enable_if_t, - const DataT &> - operator[](int i) const { - return reinterpret_cast(&m_Data)[i]; - } - - template - typename std::enable_if_t, - DataT &> - operator[](int i) { - return reinterpret_cast(&m_Data)[i]; - } - -#ifdef _MSC_VER -#define __SYCL_NOINLINE_BF16 __declspec(noinline) -#else -#define __SYCL_NOINLINE_BF16 __attribute__((noinline)) -#endif - - template - __SYCL_NOINLINE_BF16 - typename std::enable_if_t, - const DataT &> - operator[](int i) const { - return reinterpret_cast(&m_Data)[i]; - } - - template - __SYCL_NOINLINE_BF16 - typename std::enable_if_t, - DataT &> - operator[](int i) { - return reinterpret_cast(&m_Data)[i]; - } + const DataT &operator[](int i) const { return m_Data[i]; } -#undef __SYCL_NOINLINE_BF16 + DataT &operator[](int i) { return m_Data[i]; } // Begin hi/lo, even/odd, xyzw, and rgba swizzles. private: @@ -961,7 +574,7 @@ template class vec { multi_ptr Ptr) const { for (int I = 0; I < NumElements; I++) { *multi_ptr(Ptr + Offset * NumElements + - I) = getValue(I); + I) = m_Data[I]; } } template class vec { store(Offset, MultiPtr); } - void ConvertToDataT() { +#ifdef __SYCL_DEVICE_ONLY__ + // Require only for std::bool. + inline void ConvertToDataT() { for (size_t i = 0; i < NumElements; ++i) { - DataT tmp = getValue(i); - setValue(i, tmp); + m_Data[i] = bit_cast(m_Data[i]) != 0; } } +#endif -#ifdef __SYCL_BINOP -#error "Undefine __SYCL_BINOP macro" + /******************* sycl::vec math operations ***********************/ +#if defined(__SYCL_BINOP) || defined(BINOP_BASE) +#error "Undefine __SYCL_BINOP and BINOP_BASE macro" #endif -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \ - friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \ +#ifdef __SYCL_DEVICE_ONLY__ +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret; \ - if constexpr (IsUsingArrayOnDevice) { \ + if constexpr (IsBfloat16) { \ for (size_t I = 0; I < NumElements; ++I) { \ - Ret.setValue(I, (Lhs.getValue(I) BINOP Rhs.getValue(I))); \ + Ret[I] = Lhs[I] BINOP Rhs[I]; \ } \ } else { \ - Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ - if constexpr (std::is_same_v && CONVERT) { \ + vector_t ExtVecLhs = sycl::bit_cast(Lhs); \ + vector_t ExtVecRhs = sycl::bit_cast(Rhs); \ + Ret = vec(ExtVecLhs BINOP ExtVecRhs); \ + if constexpr (std::is_same_v && CONVERT) { \ Ret.ConvertToDataT(); \ } \ } \ return Ret; \ - } \ - friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \ - return Lhs BINOP vec(Rhs); \ - } \ - friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \ - return vec(Lhs) BINOP Rhs; \ - } \ - friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \ - Lhs = Lhs BINOP Rhs; \ - return Lhs; \ - } \ - template \ - friend typename std::enable_if_t operator OPASSIGN( \ - vec & Lhs, const DataT & Rhs) { \ - Lhs = Lhs BINOP vec(Rhs); \ - return Lhs; \ } +#else // __SYCL_DEVICE_ONLY__ -#else // __SYCL_USE_EXT_VECTOR_TYPE__ - -#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT) \ - friend vec operator BINOP(const vec &Lhs, const vec &Rhs) { \ +#define BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ - if constexpr (NativeVec) \ - Ret.m_Data = Lhs.m_Data BINOP Rhs.m_Data; \ - else \ - for (size_t I = 0; I < NumElements; ++I) \ - Ret.setValue(I, (DataT)(vec_data::get(Lhs.getValue( \ - I)) BINOP vec_data::get(Rhs.getValue(I)))); \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = Lhs[I] BINOP Rhs[I]; \ + } \ return Ret; \ - } \ - friend vec operator BINOP(const vec &Lhs, const DataT &Rhs) { \ + } +#endif // __SYCL_DEVICE_ONLY__ + +#define __SYCL_BINOP(BINOP, OPASSIGN, CONVERT, COND) \ + BINOP_BASE(BINOP, OPASSIGN, CONVERT, COND) \ + \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const vec & Lhs, const DataT & Rhs) { \ return Lhs BINOP vec(Rhs); \ } \ - friend vec operator BINOP(const DataT &Lhs, const vec &Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> operator BINOP( \ + const DataT & Lhs, const vec & Rhs) { \ return vec(Lhs) BINOP Rhs; \ } \ - friend vec &operator OPASSIGN(vec & Lhs, const vec & Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> &operator OPASSIGN( \ + vec & Lhs, const vec & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ - template \ - friend typename std::enable_if_t operator OPASSIGN( \ - vec & Lhs, const DataT & Rhs) { \ + template \ + friend typename std::enable_if_t<(Num != 1) && (COND), vec &> \ + operator OPASSIGN(vec & Lhs, const DataT & Rhs) { \ Lhs = Lhs BINOP vec(Rhs); \ return Lhs; \ } -#endif // __SYCL_USE_EXT_VECTOR_TYPE__ + // std::byte is not an arithmetic type. + __SYCL_BINOP(+, +=, true, (!detail::is_byte_v)) + __SYCL_BINOP(-, -=, true, (!detail::is_byte_v)) + __SYCL_BINOP(*, *=, false, (!detail::is_byte_v)) + __SYCL_BINOP(/, /=, false, (!detail::is_byte_v)) + + // The following OPs are available only when: DataT != cl_float && + // DataT != cl_double && DataT != cl_half && DataT != BF16. + __SYCL_BINOP(%, %=, false, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + // Bitwise operations are allowed for std::byte. + __SYCL_BINOP(|, |=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(&, &=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(^, ^=, false, (!detail::is_vgenfloat_v)) + __SYCL_BINOP(>>, >>=, false, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + __SYCL_BINOP(<<, <<=, true, + (!detail::is_vgenfloat_v && (!detail::is_byte_v))) + +#undef BINOP_BASE +#undef __SYCL_BINOP - __SYCL_BINOP(+, +=, true) - __SYCL_BINOP(-, -=, true) - __SYCL_BINOP(*, *=, false) - __SYCL_BINOP(/, /=, false) + // Special <<, >> operators for std::byte. + // std::byte is not an arithmetic type and it only supports the following + // overloads of >> and << operators. + // + // 1 template + // constexpr std::byte operator<<( std::byte b, IntegerType shift ) + // noexcept; + // 2 template + // constexpr std::byte operator>>( std::byte b, IntegerType shift ) + // noexcept; +#define __SYCL_SHIFT_BYTE(OP, OPASSIGN) \ + template \ + friend typename std::enable_if_t<(detail::is_byte_v), vec> operator OP( \ + const vec & Lhs, int shift) { \ + vec Ret; \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = Lhs[I] OP shift; \ + } \ + return Ret; \ + } \ + template \ + friend typename std::enable_if_t<(detail::is_byte_v), vec &> \ + operator OPASSIGN(vec & Lhs, int shift) { \ + Lhs = Lhs OP shift; \ + return Lhs; \ + } - // TODO: The following OPs are available only when: DataT != cl_float && - // DataT != cl_double && DataT != cl_half - __SYCL_BINOP(%, %=, false) - __SYCL_BINOP(|, |=, false) - __SYCL_BINOP(&, &=, false) - __SYCL_BINOP(^, ^=, false) - __SYCL_BINOP(>>, >>=, false) - __SYCL_BINOP(<<, <<=, true) -#undef __SYCL_BINOP -#undef __SYCL_BINOP_HELP + __SYCL_SHIFT_BYTE(<<, <<=) + __SYCL_SHIFT_BYTE(>>, >>=) +#undef __SYCL_SHIFT_BYTE // Note: vec<>/SwizzleOp logical value is 0/-1 logic, as opposed to 0/1 logic. // As far as CTS validation is concerned, 0/-1 logic also applies when @@ -1075,299 +723,227 @@ template class vec { // TODO: Determine if vec<, NumElements=1> is needed at all, remove this // inconsistency if not by disallowing one-element vectors (as in OpenCL) -#ifdef __SYCL_RELLOGOP -#error "Undefine __SYCL_RELLOGOP macro" +#if defined(__SYCL_RELLOGOP) || defined(RELLOGOP_BASE) +#error "Undefine __SYCL_RELLOGOP and RELLOGOP_BASE macro." #endif -// Use __SYCL_DEVICE_ONLY__ macro because cast to OpenCL vector type is defined -// by SYCL device compiler only. + #ifdef __SYCL_DEVICE_ONLY__ -#define __SYCL_RELLOGOP(RELLOGOP) \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const vec & Rhs) { \ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ vec Ret{}; \ - /* This special case is needed since there are no standard operator|| */ \ - /* or operator&& functions for std::array. */ \ - if constexpr (IsUsingArrayOnDevice && \ - (std::string_view(#RELLOGOP) == "||" || \ - std::string_view(#RELLOGOP) == "&&")) { \ + /* ext_vector_type does not support bfloat16, so for these */ \ + /* we do element-by-element operation on the underlying std::array. */ \ + if constexpr (IsBfloat16) { \ for (size_t I = 0; I < NumElements; ++I) { \ /* We cannot use SetValue here as the operator is not a friend of*/ \ /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ } \ } else { \ + vector_t ExtVecLhs = sycl::bit_cast(Lhs); \ + vector_t ExtVecRhs = sycl::bit_cast(Rhs); \ + /* Cast required to convert unsigned char ext_vec_type to */ \ + /* char ext_vec_type. */ \ Ret = vec( \ (typename vec::vector_t)( \ - Lhs.m_Data RELLOGOP Rhs.m_Data)); \ - if (NumElements == 1) /*Scalar 0/1 logic was applied, invert*/ \ + ExtVecLhs RELLOGOP ExtVecRhs)); \ + /* For NumElements == 1, we use scalar instead of ext_vector_type. */ \ + if constexpr (NumElements == 1) { \ Ret *= -1; \ + } \ } \ return Ret; \ - } \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const DataT & Rhs) { \ - return Lhs RELLOGOP vec(Rhs); \ - } \ - friend vec operator RELLOGOP(const DataT & Lhs, \ - const vec & Rhs) { \ - return vec(Lhs) RELLOGOP Rhs; \ } - -#else -#define __SYCL_RELLOGOP(RELLOGOP) \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const vec & Rhs) { \ +#else // __SYCL_DEVICE_ONLY__ +#define RELLOGOP_BASE(RELLOGOP, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const vec & Rhs) { \ + \ vec Ret{}; \ for (size_t I = 0; I < NumElements; ++I) { \ - /* We cannot use SetValue here as the operator is not a friend of*/ \ - /* Ret on Windows. */ \ - Ret[I] = static_cast(-(vec_data::get( \ - Lhs.getValue(I)) RELLOGOP vec_data::get(Rhs.getValue(I)))); \ + Ret[I] = static_cast(-(Lhs[I] RELLOGOP Rhs[I])); \ } \ return Ret; \ - } \ - friend vec operator RELLOGOP(const vec & Lhs, \ - const DataT & Rhs) { \ + } +#endif + +#define __SYCL_RELLOGOP(RELLOGOP, COND) \ + RELLOGOP_BASE(RELLOGOP, COND) \ + \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const vec & Lhs, const DataT & Rhs) { \ return Lhs RELLOGOP vec(Rhs); \ } \ - friend vec operator RELLOGOP(const DataT & Lhs, \ - const vec & Rhs) { \ + template \ + friend typename std::enable_if_t<(COND), vec> \ + operator RELLOGOP(const DataT & Lhs, const vec & Rhs) { \ return vec(Lhs) RELLOGOP Rhs; \ } -#endif - __SYCL_RELLOGOP(==) - __SYCL_RELLOGOP(!=) - __SYCL_RELLOGOP(>) - __SYCL_RELLOGOP(<) - __SYCL_RELLOGOP(>=) - __SYCL_RELLOGOP(<=) - // TODO: limit to integral types. - __SYCL_RELLOGOP(&&) - __SYCL_RELLOGOP(||) + // OP is: ==, !=, <, >, <=, >=, &&, || + // vec operatorOP(const vec &Rhs) const; + // vec operatorOP(const DataT &Rhs) const; + __SYCL_RELLOGOP(==, (!detail::is_byte_v)) + __SYCL_RELLOGOP(!=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<, (!detail::is_byte_v)) + __SYCL_RELLOGOP(>=, (!detail::is_byte_v)) + __SYCL_RELLOGOP(<=, (!detail::is_byte_v)) + + // Only available to integral types. + __SYCL_RELLOGOP(&&, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) + __SYCL_RELLOGOP(||, (!detail::is_vgenfloat_v) && (!detail::is_byte_v)) #undef __SYCL_RELLOGOP +// ++ and -- operators are only allowed for DataT!=std::byte and DataT!=bool +// FIXME: Don't allow Unary operators on vec after +// https://github.com/KhronosGroup/SYCL-CTS/issues/896 gets fixed. #ifdef __SYCL_UOP #error "Undefine __SYCL_UOP macro" #endif -#define __SYCL_UOP(UOP, OPASSIGN) \ - friend vec &operator UOP(vec & Rhs) { \ - Rhs OPASSIGN vec_data::get(1); \ +#define __SYCL_UOP(UOP, OPASSIGN, COND) \ + template \ + friend typename std::enable_if_t<(COND), vec &> operator UOP(vec & Rhs) { \ + Rhs OPASSIGN DataT{1}; \ return Rhs; \ } \ - friend vec operator UOP(vec &Lhs, int) { \ + template \ + friend typename std::enable_if_t<(COND), vec> operator UOP(vec & Lhs, int) { \ vec Ret(Lhs); \ - Lhs OPASSIGN vec_data::get(1); \ + Lhs OPASSIGN DataT{1}; \ return Ret; \ } - __SYCL_UOP(++, +=) - __SYCL_UOP(--, -=) + __SYCL_UOP(++, +=, (!detail::is_byte_v)) + __SYCL_UOP(--, -=, (!detail::is_byte_v)) #undef __SYCL_UOP // operator~() available only when: dataT != float && dataT != double // && dataT != half - friend vec operator~(const vec &Rhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) { - Ret.setValue(I, ~Rhs.getValue(I)); - } - return Ret; - } else { - vec Ret{(typename vec::DataType) ~Rhs.m_Data}; - if constexpr (std::is_same_v) { - Ret.ConvertToDataT(); - } - return Ret; + template + friend typename std::enable_if_t, vec> + operator~(const vec &Rhs) { +#ifdef __SYCL_DEVICE_ONLY__ + auto extVec = sycl::bit_cast(Rhs); + vec Ret{~extVec}; + if constexpr (std::is_same_v) { + Ret.ConvertToDataT(); } + return Ret; +#else + vec Ret{}; + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = ~Rhs[I]; + } + return Ret; +#endif } - // operator! - friend vec, NumElements> operator!(const vec &Rhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; + // operator!. Not available for std::byte. + template + friend typename std::enable_if_t<(!detail::is_byte_v), + vec, NumElements>> + operator!(const vec &Rhs) { +#ifdef __SYCL_DEVICE_ONLY__ + if constexpr (!std::is_same_v) { + auto extVec = sycl::bit_cast(Rhs); + vec, NumElements> Ret{ + (typename vec::vector_t) !extVec}; + return Ret; + } else +#endif // __SYCL_DEVICE_ONLY__ + { + vec, NumElements> Ret{}; for (size_t I = 0; I < NumElements; ++I) { -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - // std::byte neither supports ! unary op or casting, so special handling - // is needed. And, worse, Windows has a conflict with 'byte'. - if constexpr (std::is_same_v) { - Ret.setValue(I, std::byte{!vec_data::get(Rhs.getValue(I))}); - } else -#endif // (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - { - Ret.setValue(I, !vec_data::get(Rhs.getValue(I))); - } + // static_cast will work here as the output of ! operator is either 0 or + // -1. + Ret[I] = static_cast>(-1 * (!Rhs[I])); } - return Ret.template as, NumElements>>(); - } else { - return vec{(typename vec::DataType) !Rhs.m_Data} - .template as, NumElements>>(); + return Ret; } } - // operator + - friend vec operator+(const vec &Lhs) { - if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - vec Ret{}; - for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(+vec_data::get(Lhs.getValue(I)))); - return Ret; - } else { - return vec{+Lhs.m_Data}; - } + // operator +. Not available for std::byte as it is not an arithmetic type. + template + friend typename std::enable_if_t<(!detail::is_byte_v), vec> + operator+(const vec &Lhs) { +#ifdef __SYCL_DEVICE_ONLY__ + auto extVec = sycl::bit_cast(Lhs); + return vec{+extVec}; +#else + vec Ret{}; + for (size_t I = 0; I < NumElements; ++I) + Ret[I] = +Lhs[I]; + return Ret; +#endif } - // operator - - friend vec operator-(const vec &Lhs) { + // operator -. Not available for std::byte as it is not an arithmetic type. + template + friend typename std::enable_if_t<(!detail::is_byte_v), vec> + operator-(const vec &Lhs) { namespace oneapi = sycl::ext::oneapi; vec Ret{}; - if constexpr (IsBfloat16 && NumElements == 1) { - oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data); - oneapi::bfloat16 w = -v; - Ret.m_Data = oneapi::detail::bfloat16ToBits(w); - } else if constexpr (IsBfloat16) { - for (size_t I = 0; I < NumElements; I++) { - oneapi::bfloat16 v = oneapi::detail::bitsToBfloat16(Lhs.m_Data[I]); - oneapi::bfloat16 w = -v; - Ret.m_Data[I] = oneapi::detail::bfloat16ToBits(w); - } - } else if constexpr (IsUsingArrayOnDevice || IsUsingArrayOnHost) { - for (size_t I = 0; I < NumElements; ++I) - Ret.setValue( - I, vec_data::get(-vec_data::get(Lhs.getValue(I)))); - return Ret; + if constexpr (IsBfloat16) { + for (size_t I = 0; I < NumElements; I++) + Ret[I] = -Lhs[I]; } else { - Ret = vec{-Lhs.m_Data}; - if constexpr (std::is_same_v) { +#ifndef __SYCL_DEVICE_ONLY__ + for (size_t I = 0; I < NumElements; ++I) + Ret[I] = -Lhs[I]; +#else + auto extVec = sycl::bit_cast(Lhs); + Ret = vec{-extVec}; + if constexpr (std::is_same_v) { Ret.ConvertToDataT(); } - return Ret; +#endif } + return Ret; } - // OP is: &&, || - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; - - // OP is: ==, !=, <, >, <=, >= - // vec operatorOP(const vec &Rhs) const; - // vec operatorOP(const DataT &Rhs) const; private: - // Generic method that execute "Operation" on underlying values. - -#ifdef __SYCL_USE_EXT_VECTOR_TYPE__ - template