Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang][x86] Add constexpr support for some basic SSE1 intrinsics #111001

Merged
merged 1 commit into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 58 additions & 72 deletions clang/lib/Headers/xmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
__min_vector_width__(128)))
#endif

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
#endif

#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
#define __zext128(x) \
Expand Down Expand Up @@ -75,9 +83,8 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16)));
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum
/// of the lower 32 bits of both operands. The upper 96 bits are copied from
/// the upper 96 bits of the first source operand.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_add_ss(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_add_ss(__m128 __a, __m128 __b) {
__a[0] += __b[0];
return __a;
}
Expand All @@ -95,9 +102,8 @@ _mm_add_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing one of the source operands.
/// \returns A 128-bit vector of [4 x float] containing the sums of both
/// operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_add_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_add_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4sf)__a + (__v4sf)__b);
}

Expand All @@ -117,9 +123,8 @@ _mm_add_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the
/// difference of the lower 32 bits of both operands. The upper 96 bits are
/// copied from the upper 96 bits of the first source operand.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_sub_ss(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sub_ss(__m128 __a, __m128 __b) {
__a[0] -= __b[0];
return __a;
}
Expand All @@ -138,9 +143,8 @@ _mm_sub_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing the subtrahend.
/// \returns A 128-bit vector of [4 x float] containing the differences between
/// both operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_sub_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_sub_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4sf)__a - (__v4sf)__b);
}

Expand All @@ -160,9 +164,8 @@ _mm_sub_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the product of the lower
/// 32 bits of both operands. The upper 96 bits are copied from the upper 96
/// bits of the first source operand.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mul_ss(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mul_ss(__m128 __a, __m128 __b) {
__a[0] *= __b[0];
return __a;
}
Expand All @@ -180,9 +183,8 @@ _mm_mul_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing one of the source operands.
/// \returns A 128-bit vector of [4 x float] containing the products of both
/// operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_mul_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_mul_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4sf)__a * (__v4sf)__b);
}

Expand All @@ -202,9 +204,8 @@ _mm_mul_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the quotients of the
/// lower 32 bits of both operands. The upper 96 bits are copied from the
/// upper 96 bits of the first source operand.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_div_ss(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_div_ss(__m128 __a, __m128 __b) {
__a[0] /= __b[0];
return __a;
}
Expand All @@ -221,9 +222,8 @@ _mm_div_ss(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing the divisor.
/// \returns A 128-bit vector of [4 x float] containing the quotients of both
/// operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_div_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_div_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4sf)__a / (__v4sf)__b);
}

Expand Down Expand Up @@ -437,9 +437,8 @@ _mm_max_ps(__m128 __a, __m128 __b)
/// A 128-bit vector containing one of the source operands.
/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
/// values between both operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_and_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_and_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4su)__a & (__v4su)__b);
}

Expand All @@ -459,9 +458,8 @@ _mm_and_ps(__m128 __a, __m128 __b)
/// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the
/// one's complement of the first operand and the values in the second
/// operand.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_andnot_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_andnot_ps(__m128 __a, __m128 __b) {
return (__m128)(~(__v4su)__a & (__v4su)__b);
}

Expand All @@ -477,9 +475,8 @@ _mm_andnot_ps(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing one of the source operands.
/// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the
/// values between both operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_or_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_or_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4su)__a | (__v4su)__b);
}

Expand All @@ -496,9 +493,8 @@ _mm_or_ps(__m128 __a, __m128 __b)
/// A 128-bit vector of [4 x float] containing one of the source operands.
/// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR
/// of the values between both operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_xor_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_xor_ps(__m128 __a, __m128 __b) {
return (__m128)((__v4su)__a ^ (__v4su)__b);
}

Expand Down Expand Up @@ -1738,9 +1734,8 @@ _mm_cvt_pi2ps(__m128 __a, __m64 __b)
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
/// used in the extraction.
/// \returns A 32-bit float containing the extracted value.
static __inline__ float __DEFAULT_FN_ATTRS
_mm_cvtss_f32(__m128 __a)
{
static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_cvtss_f32(__m128 __a) {
return __a[0];
}

Expand Down Expand Up @@ -1931,9 +1926,8 @@ _mm_undefined_ps(void)
/// \returns An initialized 128-bit floating-point vector of [4 x float]. The
/// lower 32 bits contain the value provided in the source operand. The
/// upper 96 bits are set to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ss(float __w)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_ss(float __w) {
return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f };
}

Expand All @@ -1949,9 +1943,8 @@ _mm_set_ss(float __w)
/// A single-precision floating-point value used to initialize each vector
/// element of the result.
/// \returns An initialized 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set1_ps(float __w)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set1_ps(float __w) {
return __extension__ (__m128){ __w, __w, __w, __w };
}

Expand All @@ -1968,9 +1961,8 @@ _mm_set1_ps(float __w)
/// A single-precision floating-point value used to initialize each vector
/// element of the result.
/// \returns An initialized 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ps1(float __w)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_ps1(float __w) {
return _mm_set1_ps(__w);
}

Expand All @@ -1995,9 +1987,8 @@ _mm_set_ps1(float __w)
/// A single-precision floating-point value used to initialize bits [31:0]
/// of the result.
/// \returns An initialized 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_set_ps(float __z, float __y, float __x, float __w)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_set_ps(float __z, float __y, float __x, float __w) {
return __extension__ (__m128){ __w, __x, __y, __z };
}

Expand All @@ -2023,9 +2014,8 @@ _mm_set_ps(float __z, float __y, float __x, float __w)
/// A single-precision floating-point value used to initialize bits [127:96]
/// of the result.
/// \returns An initialized 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_setr_ps(float __z, float __y, float __x, float __w)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setr_ps(float __z, float __y, float __x, float __w) {
return __extension__ (__m128){ __z, __y, __x, __w };
}

Expand All @@ -2038,9 +2028,8 @@ _mm_setr_ps(float __z, float __y, float __x, float __w)
///
/// \returns An initialized 128-bit floating-point vector of [4 x float] with
/// all elements set to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_setzero_ps(void)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_setzero_ps(void) {
return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
}

Expand Down Expand Up @@ -2786,9 +2775,8 @@ void _mm_setcsr(unsigned int __i);
/// Bits [95:64] are written to bits [63:32] of the destination. \n
/// Bits [127:96] are written to bits [127:96] of the destination.
/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_unpackhi_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_unpackhi_ps(__m128 __a, __m128 __b) {
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
}

Expand All @@ -2808,9 +2796,8 @@ _mm_unpackhi_ps(__m128 __a, __m128 __b)
/// Bits [31:0] are written to bits [63:32] of the destination. \n
/// Bits [63:32] are written to bits [127:96] of the destination.
/// \returns A 128-bit vector of [4 x float] containing the interleaved values.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_unpacklo_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_unpacklo_ps(__m128 __a, __m128 __b) {
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
}

Expand All @@ -2830,9 +2817,8 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b)
/// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are
/// written to the lower 32 bits of the result.
/// \returns A 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_move_ss(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_move_ss(__m128 __a, __m128 __b) {
__a[0] = __b[0];
return __a;
}
Expand All @@ -2852,9 +2838,8 @@ _mm_move_ss(__m128 __a, __m128 __b)
/// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are
/// written to the lower 64 bits of the result.
/// \returns A 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_movehl_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movehl_ps(__m128 __a, __m128 __b) {
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
}

Expand All @@ -2873,9 +2858,8 @@ _mm_movehl_ps(__m128 __a, __m128 __b)
/// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are
/// written to the upper 64 bits of the result.
/// \returns A 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_movelh_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movelh_ps(__m128 __a, __m128 __b) {
return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
}

Expand Down Expand Up @@ -3207,7 +3191,9 @@ do { \
#undef __anyext128
#undef __zeroupper64
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#undef __DEFAULT_FN_ATTRS_SSE2
#undef __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR

/* Ugly hack for backwards-compatibility (compatible with gcc) */
#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)
Expand Down
Loading
Loading