From ef168511f8f966043c6bb9f4e2a713348b2aa6ef Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 10 Oct 2024 12:49:35 +0100 Subject: [PATCH] [clang][x86] Add constexpr support for _mm_cvtsi32_ss/_mm_cvt_si2ss/_mm_cvtsi64_ss SSE1 intrinsics Followup to #111001 --- clang/lib/Headers/xmmintrin.h | 15 ++++++--------- clang/test/CodeGen/X86/sse-builtins.c | 9 +++++++++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 2aa688adefc25..20e66d190113a 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -1618,9 +1618,8 @@ _mm_cvtt_ps2pi(__m128 __a) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvtsi32_ss(__m128 __a, int __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsi32_ss(__m128 __a, + int __b) { __a[0] = __b; return __a; } @@ -1641,9 +1640,8 @@ _mm_cvtsi32_ss(__m128 __a, int __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvt_si2ss(__m128 __a, int __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvt_si2ss(__m128 __a, + int __b) { return _mm_cvtsi32_ss(__a, __b); } @@ -1665,9 +1663,8 @@ _mm_cvt_si2ss(__m128 __a, int __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvtsi64_ss(__m128 __a, long long __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi64_ss(__m128 __a, long long __b) { __a[0] = __b; return __a; } diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 932d6f36b09b6..391e049a6ae3e 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -948,6 +948,15 @@ void test_constexpr() { constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2); static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f); + constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42); + static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f); + + constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99); + static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f); + + constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555); + static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f); + static_assert(_mm_cvtss_f32(k2) == +8.0f); }