Skip to content

Commit

Permalink
[SLP]Fix PR58956: fix insertpoint for reduced buildvector graphs.
Browse files Browse the repository at this point in the history
If the graph is only the buildvector node without main operation, need
to inherit insrtpoint from the redution instruction. Otherwise the
compiler crashes trying to insert instruction at the entry block.
  • Loading branch information
alexey-bataev committed Nov 16, 2022
1 parent 8fbb6f8 commit 2f8f17c
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 10 deletions.
24 changes: 14 additions & 10 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,8 @@ class BoUpSLP {
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
Instruction *ReductionRoot = nullptr);

/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
Expand Down Expand Up @@ -9002,8 +9003,8 @@ struct ShuffledInsertData {
};
} // namespace

Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
Instruction *ReductionRoot) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
Expand All @@ -9020,7 +9021,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
EntryToLastInstruction.try_emplace(E.get(), LastInst);
}

Builder.SetInsertPoint(&F->getEntryBlock().front());
Builder.SetInsertPoint(ReductionRoot ? ReductionRoot
: &F->getEntryBlock().front());
auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());

// If the vectorized tree can be rewritten in a smaller type, we truncate the
Expand Down Expand Up @@ -11944,16 +11946,18 @@ class HorizontalReduction {

Builder.setFastMathFlags(RdxFMF);

// Vectorize a tree.
Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues);

// Emit a reduction. If the root is a select (min/max idiom), the insert
// point is the compare condition of that select.
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
Instruction *InsertPt = RdxRootInst;
if (IsCmpSelMinMax)
Builder.SetInsertPoint(GetCmpForMinMaxReduction(RdxRootInst));
else
Builder.SetInsertPoint(RdxRootInst);
InsertPt = GetCmpForMinMaxReduction(RdxRootInst);

// Vectorize a tree.
Value *VectorizedRoot =
V.vectorizeTree(LocalExternallyUsedValues, InsertPt);

Builder.SetInsertPoint(InsertPt);

// To prevent poison from leaking across what used to be sequential,
// safe, scalar boolean logic operations, the reduction operand must be
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/Transforms/SLPVectorizer/AArch64/buildvector-reduce.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer < %s -mtriple=arm64-apple-macosx | FileCheck %s

define i8 @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[CALL278:%.*]] = call i32 @fn(i32 [[SUM]])
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[CALL278]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[SHUFFLE]])
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
br label %for.body

for.body:
%sum = phi i32 [ %add285.19, %for.body ], [ 0, %entry ]
%call278 = call i32 @fn(i32 %sum)
%add285.13 = add i32 %call278, %call278
%add285.14 = add i32 %add285.13, %call278
%add285.15 = add i32 %add285.14, %call278
%add285.16 = add i32 %add285.15, %call278
%add285.17 = add i32 %add285.16, %call278
%add285.18 = add i32 %add285.17, %call278
%add285.19 = add i32 %add285.18, %call278
br label %for.body
}

declare i32 @fn(i32)

0 comments on commit 2f8f17c

Please sign in to comment.