Skip to content

Commit

Permalink
Revert "Revert "[InstCombine] remove identity shuffle simplification …
Browse files Browse the repository at this point in the history
…for mask with undefs""

This reverts commit b48a1ce.

Change-Id: I8f2ed541743e20e0a9304d140013b143394dcf96
  • Loading branch information
jayfoad committed Dec 16, 2019
1 parent 8d1fae4 commit a6cdb66
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 52 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt RHSUndefElts(OpWidth, 0);
simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts);

// If this shuffle does not change the vector length and the elements
// demanded by this shuffle are an identity mask, then this shuffle is
// unnecessary.
//
// We are assuming canonical form for the mask, so the source vector is
// operand 0 and operand 1 is not used.
//
// Note that if an element is demanded and this shuffle mask is undefined
// for that element, then the shuffle is not considered an identity
// operation. The shuffle prevents poison from the operand vector from
// leaking to the result by replacing poison with an undefined value.
if (VWidth == OpWidth) {
bool IsIdentityShuffle = true;
for (unsigned i = 0; i < VWidth; i++) {
unsigned MaskVal = Shuffle->getMaskValue(i);
if (DemandedElts[i] && i != MaskVal) {
IsIdentityShuffle = false;
break;
}
}
if (IsIdentityShuffle)
return Shuffle->getOperand(0);
}

bool NewUndefElts = false;
unsigned LHSIdx = -1u, LHSValIdx = -1u;
unsigned RHSIdx = -1u, RHSValIdx = -1u;
Expand Down
24 changes: 0 additions & 24 deletions llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1390,20 +1390,6 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}

static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
bool &isLHSID, bool &isRHSID) {
isLHSID = isRHSID = true;

for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
if (Mask[i] < 0) continue; // Ignore undef values.
// Is this an identity shuffle of the LHS value?
isLHSID &= (Mask[i] == (int)i);

// Is this an identity shuffle of the RHS value?
isRHSID &= (Mask[i]-e == i);
}
}

// Returns true if the shuffle is extracting a contiguous range of values from
// LHS, for example:
// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
Expand Down Expand Up @@ -1974,16 +1960,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = foldIdentityPaddedShuffles(SVI))
return I;

if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
recognizeIdentityMask(Mask, isLHSID, isRHSID);

// Eliminate identity shuffles.
if (isLHSID) return replaceInstUsesWith(SVI, LHS);
if (isRHSID) return replaceInstUsesWith(SVI, RHS);
}

if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
Value *V = evaluateInDifferentElementOrder(LHS, Mask);
return replaceInstUsesWith(SVI, V);
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/InstCombine/X86/x86-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ define <8 x float> @undef_test_vpermps(<8 x float> %a0) {

define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
; CHECK-LABEL: @elts_test_vpermd(
; CHECK-NEXT: ret <8 x i32> [[A0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
%2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/Transforms/InstCombine/X86/x86-avx512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -921,8 +921,8 @@ declare float @llvm.fma.f32(float, float, float) #1
define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask_vfmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
Expand Down Expand Up @@ -1063,8 +1063,8 @@ define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x doub
define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_maskz_vfmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
Expand Down Expand Up @@ -1202,8 +1202,8 @@ define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x dou

define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
Expand Down Expand Up @@ -1342,8 +1342,8 @@ define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x dou

define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
; CHECK-LABEL: @test_mask3_vfmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
Expand Down Expand Up @@ -1544,7 +1544,7 @@ define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x f
; CHECK-LABEL: @test_mask3_vfnmsub_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/X86/x86-fma.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: @test_vfmadd_ss(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0
; CHECK-NEXT: ret <4 x float> [[TMP5]]
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/Transforms/InstCombine/X86/x86-pack.ll
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: @elts_packusdw_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = insertelement <4 x i32> %a0, i32 0, i32 0
%2 = insertelement <4 x i32> %a1, i32 0, i32 3
Expand Down Expand Up @@ -255,7 +256,8 @@ define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_256(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <16 x i16> [[TMP2]]
;
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
Expand Down Expand Up @@ -303,7 +305,8 @@ define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK-LABEL: @elts_packssdw_512(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 undef, i32 undef, i32 27, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <32 x i16> [[TMP2]]
;
%1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,8 @@ define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask,
define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
; CHECK-LABEL: @demanded_elts_insertion_avx2(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]])
; CHECK-NEXT: ret <32 x i8> [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 undef, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: ret <32 x i8> [[TMP2]]
;
%1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
%2 = insertelement <32 x i8> %1, i8 %M22, i32 22
Expand Down
20 changes: 16 additions & 4 deletions llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {

define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
; CHECK-LABEL: @test_extrq_zero_arg1(
; CHECK-NEXT: ret <2 x i64> [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
ret <2 x i64> %1
Expand Down Expand Up @@ -57,7 +60,10 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {

define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
; CHECK-LABEL: @test_extrq_call_constexpr(
; CHECK-NEXT: ret <2 x i64> [[X:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
ret <2 x i64> %1
Expand Down Expand Up @@ -235,15 +241,21 @@ define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
; second arg
define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
; CHECK-LABEL: @testInsert64Bits(
; CHECK-NEXT: ret <2 x i64> [[I:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
ret <2 x i64> %1
}

define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
; CHECK-LABEL: @testZeroLength(
; CHECK-NEXT: ret <2 x i64> [[I:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
ret <2 x i64> %1
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {

define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
; CHECK-LABEL: @elts_test_vpermilvar_ps(
; CHECK-NEXT: ret <4 x float> [[A0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
%2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
Expand All @@ -247,7 +248,8 @@ define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1)
define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]])
; CHECK-NEXT: ret <16 x float> [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <16 x float> [[TMP2]]
;
%1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
%2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
Expand All @@ -257,7 +259,8 @@ define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a

define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
; CHECK-LABEL: @elts_test_vpermilvar_pd(
; CHECK-NEXT: ret <2 x double> [[A0:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/InstCombine/shuffle_select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,8 @@ define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {

define <4 x i32> @PR41419(<4 x i32> %v) {
; CHECK-LABEL: @PR41419(
; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i32> [[S]]
;
%s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
ret <4 x i32> %s
Expand Down
9 changes: 6 additions & 3 deletions llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b)
; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0
; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i32 1
; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]]
; CHECK-NEXT: ret <4 x i32> [[FOO]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]]
;
%out0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%out01 = insertelement <4 x i32> %out0, i32 %a1, i32 1
Expand All @@ -189,7 +190,8 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b)
define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) {
; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain(
; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> undef, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]]
; CHECK-NEXT: ret <4 x float> [[OUT12]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[SHUFFLE]]
;
%out1 = insertelement <4 x float> undef, float %a1, i32 1
%out12 = insertelement <4 x float> %out1, float %a2, i32 %variable_index ; something unexpected
Expand All @@ -214,7 +216,8 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {
define <3 x i8> @shuf_sub(<3 x i8> %x) {
; CHECK-LABEL: @shuf_sub(
; CHECK-NEXT: [[BO:%.*]] = sub <3 x i8> <i8 1, i8 undef, i8 3>, [[X:%.*]]
; CHECK-NEXT: ret <3 x i8> [[BO]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%bo = sub nuw <3 x i8> <i8 1, i8 2, i8 3>, %x
%r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> <i32 0, i32 undef, i32 2>
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/InstCombine/vec_shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ define float @test6(<4 x float> %X) {

define <4 x float> @test7(<4 x float> %x) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: ret <4 x float> [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
ret <4 x float> %r
Expand Down

0 comments on commit a6cdb66

Please sign in to comment.