Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VectorCombine] Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)" #114101

Merged
merged 2 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class VectorCombine {
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
bool foldPermuteOfBinops(Instruction &I);
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
Expand Down Expand Up @@ -1400,6 +1401,100 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
return true;
}

/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
BinaryOperator *BinOp;
ArrayRef<int> OuterMask;
if (!match(&I,
m_Shuffle(m_OneUse(m_BinOp(BinOp)), m_Undef(), m_Mask(OuterMask))))
return false;

// Don't introduce poison into div/rem.
if (BinOp->isIntDivRem() && llvm::is_contained(OuterMask, PoisonMaskElem))
return false;

Value *Op00, *Op01;
ArrayRef<int> Mask0;
if (!match(BinOp->getOperand(0),
m_OneUse(m_Shuffle(m_Value(Op00), m_Value(Op01), m_Mask(Mask0)))))
return false;

Value *Op10, *Op11;
ArrayRef<int> Mask1;
if (!match(BinOp->getOperand(1),
m_OneUse(m_Shuffle(m_Value(Op10), m_Value(Op11), m_Mask(Mask1)))))
return false;

Instruction::BinaryOps Opcode = BinOp->getOpcode();
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->getType());
auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->getType());
auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->getType());
if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
return false;

unsigned NumSrcElts = BinOpTy->getNumElements();

// Don't accept shuffles that reference the second operand in
// div/rem or if its an undef arg.
if ((BinOp->isIntDivRem() || !isa<PoisonValue>(I.getOperand(1))) &&
any_of(OuterMask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need to check that the second shuffled value is poison, not undef. For undef it might be unsafe

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will prevent the folds for DivRem with both Undef/Poison - using the m_Undef() match in the m_Shuffle() above - is that not enough?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

m_Undef checks for both undef and poison, I assume. You can safely drop this for poison only, for undefs need to check the the first operand does not produce poisons, I think

return false;

// Merge outer / inner shuffles.
SmallVector<int> NewMask0, NewMask1;
for (int M : OuterMask) {
if (M < 0 || M >= (int)NumSrcElts) {
NewMask0.push_back(PoisonMaskElem);
NewMask1.push_back(PoisonMaskElem);
} else {
NewMask0.push_back(Mask0[M]);
NewMask1.push_back(Mask1[M]);
}
}

// Try to merge shuffles across the binop if the new shuffles are not costly.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

InstructionCost OldCost =
TTI.getArithmeticInstrCost(Opcode, BinOpTy, CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, BinOpTy,
OuterMask, CostKind, 0, nullptr, {BinOp}, &I) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op0Ty, Mask0,
CostKind, 0, nullptr, {Op00, Op01},
cast<Instruction>(BinOp->getOperand(0))) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op1Ty, Mask1,
CostKind, 0, nullptr, {Op10, Op11},
cast<Instruction>(BinOp->getOperand(1)));

InstructionCost NewCost =
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op0Ty, NewMask0,
CostKind, 0, nullptr, {Op00, Op01}) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op1Ty, NewMask1,
CostKind, 0, nullptr, {Op10, Op11}) +
TTI.getArithmeticInstrCost(Opcode, ShuffleDstTy, CostKind);

LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
if (NewCost >= OldCost)
return false;

Value *Shuf0 = Builder.CreateShuffleVector(Op00, Op01, NewMask0);
Value *Shuf1 = Builder.CreateShuffleVector(Op10, Op11, NewMask1);
Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);

// Intersect flags from the old binops.
if (auto *NewInst = dyn_cast<Instruction>(NewBO))
NewInst->copyIRFlags(BinOp);

Worklist.pushValue(Shuf0);
Worklist.pushValue(Shuf1);
replaceValue(I, *NewBO);
return true;
}

/// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
BinaryOperator *B0, *B1;
Expand Down Expand Up @@ -2736,6 +2831,7 @@ bool VectorCombine::run() {
MadeChange |= foldInsExtFNeg(I);
break;
case Instruction::ShuffleVector:
MadeChange |= foldPermuteOfBinops(I);
MadeChange |= foldShuffleOfBinops(I);
MadeChange |= foldShuffleOfCastops(I);
MadeChange |= foldShuffleOfShuffles(I);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 {

define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 {
; CHECK-LABEL: @reverse_hadd_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 14, i32 12, i32 6, i32 4, i32 10, i32 8, i32 2, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 15, i32 13, i32 7, i32 5, i32 11, i32 9, i32 3, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[SHUFFLE]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %a, i32 1
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 {

define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 {
; CHECK-LABEL: @reverse_hadd_v8f32(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 14, i32 12, i32 6, i32 4, i32 10, i32 8, i32 2, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 15, i32 13, i32 7, i32 5, i32 11, i32 9, i32 3, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x float> [[SHUFFLE]]
; CHECK-NEXT: ret <8 x float> [[TMP3]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %a, i32 1
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
; AVX1-NEXT: ret <4 x double> [[SHUFFLE]]
;
; AVX2-LABEL: @PR50392(
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
Expand Down
21 changes: 13 additions & 8 deletions llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
; SSE-NEXT: ret <4 x double> [[TMP4]]
;
; AVX-LABEL: @PR94546(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
; AVX-NEXT: ret <4 x double> [[TMP4]]
; AVX1-LABEL: @PR94546(
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
; AVX1-NEXT: ret <4 x double> [[TMP4]]
;
; AVX2-LABEL: @PR94546(
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
; AVX2-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; AVX2-NEXT: ret <4 x double> [[TMP3]]
;
%vecext = extractelement <4 x double> %a, i32 0
%vecext1 = extractelement <4 x double> %a, i32 1
Expand All @@ -43,5 +49,4 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX1: {{.*}}
; AVX2: {{.*}}
; AVX: {{.*}}
Original file line number Diff line number Diff line change
Expand Up @@ -937,10 +937,9 @@ define <4 x i64> @cast_mismatched_types(<4 x i32> %x) {

define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @fadd_mismatched_types(
; CHECK-NEXT: [[SHUF_X:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[SHUF_Y:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[FADD:%.*]] = fadd fast <2 x float> [[SHUF_X]], [[SHUF_Y]]
; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x float> [[FADD]], <2 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison>
; CHECK-NEXT: [[EXTSHUF:%.*]] = fadd fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <4 x float> [[EXTSHUF]]
;
%shuf.x = shufflevector <4 x float> %x, <4 x float> poison, <2 x i32> <i32 0, i32 2>
Expand Down
28 changes: 12 additions & 16 deletions llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ declare void @use_v4f64(<4 x double>)
define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: define <4 x double> @fadd_v4f64(
; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x double> [[POST1]]
; CHECK-NEXT: ret <4 x double> [[POST]]
;
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
Expand All @@ -25,11 +24,10 @@ define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: define <4 x double> @fadd_v4f64_poison_idx(
; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 poison>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 poison>
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
; CHECK-NEXT: ret <4 x double> [[POST1]]
; CHECK-NEXT: ret <4 x double> [[POST]]
;
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
Expand All @@ -41,11 +39,10 @@ define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) {
define <4 x double> @fadd_mixed_types(<4 x double> %a, <2 x double> %b) {
; CHECK-LABEL: define <4 x double> @fadd_mixed_types(
; CHECK-SAME: <4 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
; CHECK-NEXT: ret <4 x double> [[POST1]]
; CHECK-NEXT: ret <4 x double> [[POST]]
;
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%b1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
Expand Down Expand Up @@ -95,11 +92,10 @@ define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %
define <4 x i32> @sdiv_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: define <4 x i32> @sdiv_v4i32(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[POST:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x i32> [[POST]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[POST1]]
; CHECK-NEXT: ret <4 x i32> [[POST]]
;
%a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
Expand Down
Loading