Skip to content

Commit

Permalink
[SYCL] Use pair of native::sin/cos for sincos under __FAST_MATH__ (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
aelovikov-intel authored Aug 8, 2023
1 parent 5eef8c7 commit bd81fc4
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,8 @@ std::enable_if_t<__FAST_MATH_GENFLOAT(T), T> sin(T x) __NOEXC {

// svgenfloat sincos (svgenfloat x, genfloatptr cosval)
template <typename T, typename T2>
std::enable_if_t<
detail::is_svgenfloat<T>::value && detail::is_genfloatptr<T2>::value, T>
std::enable_if_t<__FAST_MATH_GENFLOAT(T) && detail::is_genfloatptr<T2>::value,
T>
sincos(T x, T2 cosval) __NOEXC {
detail::check_vector_size<T, T2>();
return __sycl_std::__invoke_sincos<T>(x, cosval);
Expand Down Expand Up @@ -2500,6 +2500,23 @@ std::enable_if_t<detail::is_svgenfloatf<T>::value, T> cos(T x) __NOEXC {
return native::cos(x);
}

// svgenfloat sincos (svgenfloat x, genfloatptr cosval)
// This is a performance optimization to ensure that sincos isn't slower than a
// pair of sin/cos executed separately. Theoretically, calling non-native sincos
// might be faster than calling native::sin plus native::cos separately and we'd
// need some kind of cost model to make the right decision (and move this
// entirely to the JIT/AOT compilers). However, in practice, this simpler
// solution seems to work just fine and matches how sin/cos above are optimized
// for the fast math path.
template <typename T, typename T2>
std::enable_if_t<
detail::is_svgenfloatf<T>::value && detail::is_genfloatptr<T2>::value, T>
sincos(T x, T2 cosval) __NOEXC {
detail::check_vector_size<T, T2>();
*cosval = native::cos(x);
return native::sin(x);
}

// svgenfloatf exp (svgenfloatf x)
template <typename T>
std::enable_if_t<detail::is_svgenfloatf<T>::value, T> exp(T x) __NOEXC {
Expand Down

0 comments on commit bd81fc4

Please sign in to comment.