From d33ba1aa0b505e3f4c55b382f171e8cbef6a1843 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 31 May 2020 13:19:18 +0100 Subject: [PATCH 1/6] [X86][AVX] getFauxShuffleMask - don't widen shuffle inputs from INSERT_SUBVECTOR(X,SHUFFLE(Y,Z)) Don't create nodes on the fly when decoding INSERT_SUBVECTOR as faux shuffles. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bcaf6298de3322..95c9312cd772ce 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7438,9 +7438,9 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SubMask, DAG, Depth + 1, ResolveKnownElts)) return false; - // Shuffle inputs must be the same size as the subvector. + // Subvector shuffle inputs must not be larger than the subvector. if (llvm::any_of(SubInputs, [SubVT](SDValue Op) { - return SubVT.getSizeInBits() != Op.getValueSizeInBits(); + return SubVT.getSizeInBits() > Op.getValueSizeInBits(); })) return false; @@ -7460,14 +7460,7 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, } } Ops.push_back(Src); - for (SDValue &SubInput : SubInputs) { - EVT SubSVT = SubInput.getValueType().getScalarType(); - EVT AltVT = EVT::getVectorVT(*DAG.getContext(), SubSVT, - NumSizeInBits / SubSVT.getSizeInBits()); - Ops.push_back(DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), AltVT, - DAG.getUNDEF(AltVT), SubInput, - DAG.getIntPtrConstant(0, SDLoc(N)))); - } + Ops.append(SubInputs.begin(), SubInputs.end()); for (int i = 0; i != (int)NumElts; ++i) Mask.push_back(i); for (int i = 0; i != (int)NumSubElts; ++i) { From 129c501aa9199c2c5a69c7a6de8ec9873e3d41a4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 31 May 2020 08:41:09 -0400 Subject: [PATCH 2/6] [PhaseOrdering] add scalarization test for PR42174; NFC Motivating test for vector-combine enhancement in D80885. Make sure that vectorization and canonicalization are working together as expected. --- .../PhaseOrdering/X86/scalarization.ll | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll new file mode 100644 index 00000000000000..3b341f6a5b7a55 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -S < %s | FileCheck %s +; RUN: opt -passes='default' -S < %s | FileCheck %s + +target triple = "x86_64--" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; PR42174 - https://bugs.llvm.org/show_bug.cgi?id=42174 +; This test should match the IR produced by clang after running -mem2reg. +; All math before the final 'add' should be scalarized. + +define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) { +; CHECK-LABEL: @square( +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2 +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[DIV]], i32 0 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234 +; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL]], i32 0 +; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75 +; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[MUL5]], i32 0 +; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452 +; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[DIV9]], i32 0 +; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 +; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[MUL13]], i32 0 +; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 +; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[DIV17]], i32 0 +; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 +; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[MUL21]], i32 0 +; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SPLATINSERT25]], +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SPLATINSERT18]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SPLATINSERT6]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]] +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]] +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP10]], [[NUM:%.*]] +; CHECK-NEXT: ret <4 x i32> [[ADD29]] +; + %add = add <4 x i32> %num, + %div = sdiv i32 %k, 2 + %splatinsert = insertelement <4 x i32> undef, i32 %div, i32 0 + %splat = shufflevector <4 x i32> %splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + %add1 = add <4 x i32> %add, %splat + %mul = mul nsw i32 %p, 6234 + %splatinsert2 = insertelement <4 x i32> undef, i32 %mul, i32 0 + %splat3 = shufflevector <4 x i32> %splatinsert2, <4 x i32> undef, <4 x i32> zeroinitializer + %add4 = add <4 x i32> %add1, %splat3 + %mul5 = mul nsw i32 75, %h + %splatinsert6 = insertelement <4 x i32> undef, i32 %mul5, i32 0 + %splat7 = shufflevector <4 x i32> %splatinsert6, <4 x i32> undef, <4 x i32> zeroinitializer + %add8 = add <4 x i32> %add4, %splat7 + %div9 = sdiv i32 %j, 3452 + %splatinsert10 = insertelement <4 x i32> undef, i32 %div9, i32 0 + %splat11 = shufflevector <4 x i32> %splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer + %add12 = add <4 x i32> %add8, %splat11 + %mul13 = mul nsw i32 53, %w + %splatinsert14 = insertelement <4 x i32> undef, i32 %mul13, i32 0 + %splat15 = shufflevector <4 x i32> %splatinsert14, <4 x i32> undef, <4 x i32> zeroinitializer + %add16 = add <4 x i32> %add12, %splat15 + %div17 = sdiv i32 %x, 820 + %splatinsert18 = insertelement <4 x i32> undef, i32 %div17, i32 0 + %splat19 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer + %add20 = add <4 x i32> %add16, %splat19 + %mul21 = mul nsw i32 4, %u + %splatinsert22 = insertelement <4 x i32> undef, i32 %mul21, i32 0 + %splat23 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer + %add24 = add <4 x i32> %add20, %splat23 + %splatinsert25 = insertelement <4 x i32> undef, i32 %y, i32 0 + %splat26 = shufflevector <4 x i32> %splatinsert25, <4 x i32> undef, <4 x i32> zeroinitializer + %add27 = add <4 x i32> %add24, %splat26 + %add28 = add <4 x i32> %add27, + %add29 = add <4 x i32> %add28, + ret <4 x i32> %add29 +} + From 15b281d7805dde85af532b954e27e3fc8bf2611d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 31 May 2020 13:46:46 +0100 Subject: [PATCH 3/6] [X86][AVX] Add test case described in D79987 --- llvm/test/CodeGen/X86/oddshuffles.ll | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 03f6b526651691..e182008eadc9ea 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1977,6 +1977,75 @@ define void @splat3_256(<32 x i8> %a0, <96 x i8> *%a1) { ret void } +; D79987 +define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) { +; SSE2-LABEL: splat_v3i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,0,1] +; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: xorps %xmm3, %xmm3 +; SSE2-NEXT: retq +; +; SSE42-LABEL: splat_v3i32: +; SSE42: # %bb.0: +; SSE42-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7] +; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7] +; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,0,1] +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: xorps %xmm3, %xmm3 +; SSE42-NEXT: retq +; +; AVX1-LABEL: splat_v3i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 +; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: splat_v3i32: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX2-SLOW-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 +; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1 +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: splat_v3i32: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX2-FAST-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 +; AVX2-FAST-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7] +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX2-FAST-NEXT: retq +; +; XOP-LABEL: splat_v3i32: +; XOP: # %bb.0: +; XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; XOP-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 +; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7] +; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] +; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7] +; XOP-NEXT: retq + %1 = load <3 x i32>, <3 x i32>* %ptr, align 1 + %2 = shufflevector <3 x i32> %1, <3 x i32> undef, <16 x i32> + %3 = shufflevector <16 x i32> , <16 x i32> %2, <16 x i32> + ret <16 x i32 > %3 +} + define <2 x double> @wrongorder(<4 x double> %A, <8 x double>* %P) #0 { ; SSE2-LABEL: wrongorder: ; SSE2: # %bb.0: From f046326847076b50017b3d32db62c3511c478888 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 31 May 2020 13:50:40 +0100 Subject: [PATCH 4/6] [X86] getFauxShuffleMask/getTargetShuffleInputs - make SelectionDAG const (PR45974). Try to prevent future node creation issues (as detailed in PR45974) by making the SelectionDAG reference const, so it can still be used for analysis, but not node creation. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 95c9312cd772ce..89559ad9acbda3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7271,7 +7271,7 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl &Mask, // TODO: Use DemandedElts variant. static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, - SelectionDAG &DAG, unsigned Depth, + const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts); // Attempt to decode ops that could be represented as a shuffle mask. @@ -7280,7 +7280,7 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, SmallVectorImpl &Mask, SmallVectorImpl &Ops, - SelectionDAG &DAG, unsigned Depth, + const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts) { Mask.clear(); Ops.clear(); @@ -7734,7 +7734,7 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, APInt &KnownUndef, APInt &KnownZero, - SelectionDAG &DAG, unsigned Depth, + const SelectionDAG &DAG, unsigned Depth, bool ResolveKnownElts) { EVT VT = Op.getValueType(); if (!VT.isSimple() || !VT.isVector()) @@ -7755,7 +7755,7 @@ static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, SmallVectorImpl &Mask, - SelectionDAG &DAG, unsigned Depth = 0, + const SelectionDAG &DAG, unsigned Depth = 0, bool ResolveKnownElts = true) { EVT VT = Op.getValueType(); if (!VT.isSimple() || !VT.isVector()) From e31f2a894a7bec0a64553d615ef40fa36134844e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 31 May 2020 09:05:48 -0400 Subject: [PATCH 5/6] [VectorCombine] add tests for scalarizing binop-with-constant; NFC Goes with proposal in D80885. This is adapted from the InstCombine tests that were added for D50992 But these should be adjusted further to provide more interesting scenarios for x86-specific codegen. Eg, vector types/sizes will have different costs depending on ISA attributes. We also need to add tests that include a load of the scalar variable and add tests that include extra uses of the insert to further exercise the cost model. --- .../X86/insert-binop-with-constant.ll | 643 ++++++++++++++++++ 1 file changed, 643 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll new file mode 100644 index 00000000000000..7b8dc44ebc2434 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -0,0 +1,643 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + +define <2 x i64> @add_constant(i64 %x) { +; CHECK-LABEL: @add_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = add <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @add_constant_not_undef_lane(i64 %x) { +; CHECK-LABEL: @add_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = add <2 x i64> %ins, + ret <2 x i64> %bo +} + +; IR flags are not required, but they should propagate. + +define <4 x i32> @sub_constant_op0(i32 %x) { +; CHECK-LABEL: @sub_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = sub nuw nsw <4 x i32> , [[INS]] +; CHECK-NEXT: ret <4 x i32> [[BO]] +; + %ins = insertelement <4 x i32> undef, i32 %x, i32 1 + %bo = sub nsw nuw <4 x i32> , %ins + ret <4 x i32> %bo +} + +define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) { +; CHECK-LABEL: @sub_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = sub nuw <4 x i32> , [[INS]] +; CHECK-NEXT: ret <4 x i32> [[BO]] +; + %ins = insertelement <4 x i32> undef, i32 %x, i32 1 + %bo = sub nuw <4 x i32> , %ins + ret <4 x i32> %bo +} + +define <8 x i16> @sub_constant_op1(i16 %x) { +; CHECK-LABEL: @sub_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: ret <8 x i16> [[BO]] +; + %ins = insertelement <8 x i16> undef, i16 %x, i32 0 + %bo = sub nuw <8 x i16> %ins, + ret <8 x i16> %bo +} + +define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) { +; CHECK-LABEL: @sub_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: ret <8 x i16> [[BO]] +; + %ins = insertelement <8 x i16> undef, i16 %x, i32 0 + %bo = sub nuw <8 x i16> %ins, + ret <8 x i16> %bo +} + +define <16 x i8> @mul_constant(i8 %x) { +; CHECK-LABEL: @mul_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 2 +; CHECK-NEXT: [[BO:%.*]] = mul <16 x i8> [[INS]], +; CHECK-NEXT: ret <16 x i8> [[BO]] +; + %ins = insertelement <16 x i8> undef, i8 %x, i32 2 + %bo = mul <16 x i8> %ins, + ret <16 x i8> %bo +} + +define <3 x i64> @mul_constant_not_undef_lane(i64 %x) { +; CHECK-LABEL: @mul_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i64> undef, i64 [[X:%.*]], i32 2 +; CHECK-NEXT: [[BO:%.*]] = mul <3 x i64> [[INS]], +; CHECK-NEXT: ret <3 x i64> [[BO]] +; + %ins = insertelement <3 x i64> undef, i64 %x, i32 2 + %bo = mul <3 x i64> %ins, + ret <3 x i64> %bo +} + +define <2 x i64> @shl_constant_op0(i64 %x) { +; CHECK-LABEL: @shl_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = shl <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @shl_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = shl <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @shl_constant_op1(i64 %x) { +; CHECK-LABEL: @shl_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = shl nuw <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @shl_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = shl nuw <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @ashr_constant_op0(i64 %x) { +; CHECK-LABEL: @ashr_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = ashr exact <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @ashr_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = ashr exact <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @ashr_constant_op1(i64 %x) { +; CHECK-LABEL: @ashr_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = ashr <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @ashr_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = ashr <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @lshr_constant_op0(i64 %x) { +; CHECK-LABEL: @lshr_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = lshr <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @lshr_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = lshr <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @lshr_constant_op1(i64 %x) { +; CHECK-LABEL: @lshr_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = lshr exact <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @lshr_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = lshr exact <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @urem_constant_op0(i64 %x) { +; CHECK-LABEL: @urem_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = urem <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @urem_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = urem <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @urem_constant_op1(i64 %x) { +; CHECK-LABEL: @urem_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = urem <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @urem_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = urem <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @srem_constant_op0(i64 %x) { +; CHECK-LABEL: @srem_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = srem <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @srem_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = srem <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @srem_constant_op1(i64 %x) { +; CHECK-LABEL: @srem_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = srem <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @srem_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = srem <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @udiv_constant_op0(i64 %x) { +; CHECK-LABEL: @udiv_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = udiv exact <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @udiv_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = udiv exact <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @udiv_constant_op1(i64 %x) { +; CHECK-LABEL: @udiv_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = udiv <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @udiv_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = udiv <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @sdiv_constant_op0(i64 %x) { +; CHECK-LABEL: @sdiv_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = sdiv <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { +; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = sdiv <2 x i64> , %ins + ret <2 x i64> %bo +} + +define <2 x i64> @sdiv_constant_op1(i64 %x) { +; CHECK-LABEL: @sdiv_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = sdiv exact <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) { +; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = sdiv exact <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @and_constant(i64 %x) { +; CHECK-LABEL: @and_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = and <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @and_constant_not_undef_lane(i64 %x) { +; CHECK-LABEL: @and_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = and <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @or_constant(i64 %x) { +; CHECK-LABEL: @or_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = or <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @or_constant_not_undef_lane(i64 %x) { +; CHECK-LABEL: @or_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 1 + %bo = or <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @xor_constant(i64 %x) { +; CHECK-LABEL: @xor_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = xor <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x i64> @xor_constant_not_undef_lane(i64 %x) { +; CHECK-LABEL: @xor_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: ret <2 x i64> [[BO]] +; + %ins = insertelement <2 x i64> undef, i64 %x, i32 0 + %bo = xor <2 x i64> %ins, + ret <2 x i64> %bo +} + +define <2 x double> @fadd_constant(double %x) { +; CHECK-LABEL: @fadd_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fadd <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fadd_constant_not_undef_lane(double %x) { +; CHECK-LABEL: @fadd_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = fadd <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fsub_constant_op0(double %x) { +; CHECK-LABEL: @fsub_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fsub fast <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { +; CHECK-LABEL: @fsub_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = fsub nsz <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = fsub nsz <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @fsub_constant_op1(double %x) { +; CHECK-LABEL: @fsub_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = fsub <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { +; CHECK-LABEL: @fsub_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fsub <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fmul_constant(double %x) { +; CHECK-LABEL: @fmul_constant( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fmul reassoc <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fmul_constant_not_undef_lane(double %x) { +; CHECK-LABEL: @fmul_constant_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = fmul <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = fmul <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fdiv_constant_op0(double %x) { +; CHECK-LABEL: @fdiv_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = fdiv nnan <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = fdiv nnan <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { +; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fdiv ninf <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @fdiv_constant_op1(double %x) { +; CHECK-LABEL: @fdiv_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fdiv <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { +; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = fdiv <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @frem_constant_op0(double %x) { +; CHECK-LABEL: @frem_constant_op0( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = frem fast <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = frem fast <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { +; CHECK-LABEL: @frem_constant_op0_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = frem <2 x double> , [[INS]] +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = frem <2 x double> , %ins + ret <2 x double> %bo +} + +define <2 x double> @frem_constant_op1(double %x) { +; CHECK-LABEL: @frem_constant_op1( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 +; CHECK-NEXT: [[BO:%.*]] = frem ninf <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 1 + %bo = frem ninf <2 x double> %ins, + ret <2 x double> %bo +} + +define <2 x double> @frem_constant_op1_not_undef_lane(double %x) { +; CHECK-LABEL: @frem_constant_op1_not_undef_lane( +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[BO:%.*]] = frem nnan <2 x double> [[INS]], +; CHECK-NEXT: ret <2 x double> [[BO]] +; + %ins = insertelement <2 x double> undef, double %x, i32 0 + %bo = frem nnan <2 x double> %ins, + ret <2 x double> %bo +} From 4a2673d79fdbae57a800ec578ee3d58a6890a4f9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 31 May 2020 14:20:00 +0100 Subject: [PATCH 6/6] [X86][AVX] Add SimplifyMultipleUseDemandedBits VBROADCAST handling to SimplifyDemandedVectorElts. As suggested on D79987. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++++++ llvm/test/CodeGen/X86/oddshuffles.ll | 14 ++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 89559ad9acbda3..7edce21290330c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36830,6 +36830,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; + // Aggressively peek through src to get at the demanded elt. + // TODO - we should do this for all target/faux shuffles ops. + APInt SrcBits = APInt::getAllOnesValue(SrcVT.getScalarSizeInBits()); + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(Src, SrcBits, SrcElts, + TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); break; } case X86ISD::VPERMV: { diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index e182008eadc9ea..910c40d6738356 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -2014,18 +2014,16 @@ define <16 x i32> @splat_v3i32(<3 x i32>* %ptr) { ; ; AVX2-SLOW-LABEL: splat_v3i32: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-SLOW-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 -; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7] +; AVX2-SLOW-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX2-SLOW-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vbroadcastss %xmm1, %ymm1 +; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7] ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: splat_v3i32: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-FAST-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm1 +; AVX2-FAST-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; AVX2-FAST-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7] ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero