From 36c119490630846c1fa0f427cc60837fd7b40a28 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 28 Oct 2024 15:34:56 -0700 Subject: [PATCH] Remove optimization flags from clang codegen tests (#113714) - Remove an -O3 flag from a couple of clang x86 codegen tests so the tests do not need to be updated when optimizations in LLVM change. - Change the tests to use utils/update_cc_test_checks.sh - Change from apple/darwin triples to generic x86_64-- and i386-- because it was not relevant to the test but `update_cc_test_checks` seems to be unable to handle platforms that prepend `_` to function names. --- clang/test/CodeGen/X86/avx-cmp-builtins.c | 104 ++++-- clang/test/CodeGen/X86/avx-shuffle-builtins.c | 302 +++++++++++++----- clang/test/CodeGen/X86/sse.c | 58 +++- 3 files changed, 348 insertions(+), 116 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-cmp-builtins.c b/clang/test/CodeGen/X86/avx-cmp-builtins.c index c4e3c7ccd54988..2e4a383a6b3fca 100644 --- a/clang/test/CodeGen/X86/avx-cmp-builtins.c +++ b/clang/test/CodeGen/X86/avx-cmp-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -9,62 +9,124 @@ // Test LLVM IR codegen of cmpXY instructions // +// CHECK-LABEL: define dso_local <2 x double> @test_cmp_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A]], <2 x double> [[B]], i8 13) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// __m128d test_cmp_sd(__m128d a, __m128d b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13) return _mm_cmp_sd(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmp_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A]], <4 x float> [[B]], i8 13) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// __m128 test_cmp_ss(__m128 a, __m128 b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13) return _mm_cmp_ss(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpgt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 1) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpgt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpgt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 2) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpge_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpngt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 5) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpngt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpngt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpnge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 6) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpnge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpnge_ss(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpgt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 1) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpgt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpgt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 2) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpge_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpngt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 5) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpngt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpngt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpnge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 6) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpnge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpnge_sd(a, b); } diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c index d184d28f3e07aa..1c05fa436983ed 100644 --- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c @@ -1,7 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 -// FIXME: This is testing optimized generation of shuffle instructions and should be fixed. +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -10,201 +10,341 @@ // Test LLVM IR codegen of shuffle instructions, checking if the masks are correct // +// CHECK-LABEL: define dso_local <8 x float> @x( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFP:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFP]] +// __m256 x(__m256 a, __m256 b) { - // CHECK-LABEL: x - // CHECK: shufflevector{{.*}} return _mm256_shuffle_ps(a, b, 203); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm_permute_pd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[PERMIL]] +// __m128d test_mm_permute_pd(__m128d a) { - // CHECK-LABEL: test_mm_permute_pd - // CHECK: shufflevector{{.*}} return _mm_permute_pd(a, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +// CHECK-NEXT: ret <4 x double> [[PERMIL]] +// __m256d test_mm256_permute_pd(__m256d a) { - // CHECK-LABEL: test_mm256_permute_pd - // CHECK: shufflevector{{.*}} return _mm256_permute_pd(a, 5); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0x1b); } -// Test case for PR12401 +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps2( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps2(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps2 - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0xe6); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +// CHECK-NEXT: ret <8 x float> [[PERMIL]] +// __m256 test_mm256_permute_ps(__m256 a) { - // CHECK-LABEL: test_mm256_permute_ps - // CHECK: shufflevector{{.*}} return _mm256_permute_ps(a, 0x1b); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute2f128_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <4 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[VPERM]] +// __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_permute2f128_pd - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_pd(a, b, 0x31); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute2f128_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[VPERM]] +// __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_permute2f128_ps - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_ps(a, b, 0x13); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_permute2f128_si256( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <4 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[VPERM]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { - // CHECK-LABEL: test_mm256_permute2f128_si256 - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_si256(a, b, 0x20); } -__m128 -test_mm_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm_broadcast_ss - // CHECK: insertelement <4 x float> {{.*}}, i64 0 - // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x float> @test_mm_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x float> [[VECINIT4_I]] +// +__m128 test_mm_broadcast_ss(float const *__a) { return _mm_broadcast_ss(__a); } -__m256d -test_mm256_broadcast_sd(double const *__a) { - // CHECK-LABEL: test_mm256_broadcast_sd - // CHECK: insertelement <4 x double> {{.*}}, i64 0 - // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_broadcast_sd( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x double> [[VECINIT_I]], double [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x double> [[VECINIT2_I]], double [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x double> [[VECINIT3_I]], double [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x double> [[VECINIT4_I]] +// +__m256d test_mm256_broadcast_sd(double const *__a) { return _mm256_broadcast_sd(__a); } -__m256 -test_mm256_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm256_broadcast_ss - // CHECK: insertelement <8 x float> {{.*}}, i64 0 - // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP0]], i32 4 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP0]], i32 5 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP0]], i32 6 +// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <8 x float> [[VECINIT7_I]], float [[TMP0]], i32 7 +// CHECK-NEXT: ret <8 x float> [[VECINIT8_I]] +// +__m256 test_mm256_broadcast_ss(float const *__a) { return _mm256_broadcast_ss(__a); } // Make sure we have the correct mask for each insertf128 case. +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_0 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 0); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_1 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 1); } // Make sure we have the correct mask for each extractf128 case. +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_0(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_0 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_0 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 0); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_0(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_0(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 0); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_1(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_1 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_1 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 1); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_1(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 1); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_1(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 1); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_set_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I]] +// __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_set_m128 - // CHECK: shufflevector{{.*}} return _mm256_set_m128(hi, lo); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_set_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I]] +// __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_set_m128d - // CHECK: shufflevector{{.*}} return _mm256_set_m128d(hi, lo); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_set_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I]] +// __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_set_m128i - // CHECK: shufflevector{{.*}} return _mm256_set_m128i(hi, lo); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_setr_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I_I]] +// __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_setr_m128 - // CHECK: shufflevector{{.*}} return _mm256_setr_m128(lo, hi); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_setr_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I_I]] +// __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_setr_m128d - // CHECK: shufflevector{{.*}} return _mm256_setr_m128d(lo, hi); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_setr_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I_I]] +// __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_setr_m128i - // CHECK: shufflevector{{.*}} return _mm256_setr_m128i(lo, hi); } diff --git a/clang/test/CodeGen/X86/sse.c b/clang/test/CodeGen/X86/sse.c index a75b8dc77e86e1..017bdd7846fa39 100644 --- a/clang/test/CodeGen/X86/sse.c +++ b/clang/test/CodeGen/X86/sse.c @@ -1,42 +1,72 @@ -// RUN: %clang_cc1 -ffreestanding -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s -// FIXME: This test currently depends on optimization - it should be rewritten to avoid it. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -ffreestanding -triple x86_64-- -target-feature +sse4.1 -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s #include // Byte-shifts look reversed due to xmm register layout +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128 - // CHECK: shufflevector <16 x i8> <{{.*}}, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> {{.*}}, <16 x i32> return _mm_slli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_0 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_slli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_16 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 16); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128 - // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> , <16 x i32> return _mm_srli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_0 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_srli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_16 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 16); }