Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FuncSpec] Update function specialization to handle phi-chains #72903

Merged
merged 3 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);

bool discoverTransitivelyIncomingValues(
momchil-velikov marked this conversation as resolved.
Show resolved Hide resolved
Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs,
SmallVectorImpl<PHINode *> &UnknownIncomingValues);

Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitPHINode(PHINode &I);
Constant *visitFreezeInst(FreezeInst &I);
Expand Down
119 changes: 105 additions & 14 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,17 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));

static cl::opt<unsigned>
MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100),
cl::Hidden,
cl::desc("The maximum number of iterations allowed "
"when searching for transitive "
"phis"));

static cl::opt<unsigned> MaxIncomingPhiValues(
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
"The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
cl::desc("The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));

static cl::opt<unsigned> MaxBlockPredecessors(
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
Expand All @@ -64,9 +71,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));
"funcspec-min-latency-savings", cl::init(40), cl::Hidden,
cl::desc("Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
Expand Down Expand Up @@ -262,29 +269,113 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
return estimateBasicBlocks(WorkList);
}

bool InstCostVisitor::discoverTransitivelyIncomingValues(
Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs,
SmallVectorImpl<PHINode *> &UnknownIncomingValues) {

SmallVector<PHINode *, 64> WorkList;
WorkList.push_back(Root);
unsigned Iter = 0;

while (!WorkList.empty()) {
PHINode *PN = WorkList.pop_back_val();

if (++Iter > MaxDiscoveryIterations ||
PN->getNumIncomingValues() > MaxIncomingPhiValues) {
// For now just collect the Phi and later we will check whether it is
// in the Transitive set.
UnknownIncomingValues.push_back(PN);
continue;
// FIXME: return false here and remove the UnknownIncomingValues entirely.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's implement FIXME actually. It might not be hard.

Copy link
Collaborator

@labrinea labrinea Nov 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will require the regresson test to be adjusted for the NOFUNCSPEC check prefix. Last time I checked it would require another few discovery steps. But that's fine because we will be removing the following loop from visitPHI, which iterates over these undiscovered UnknownIncomingValues.

for (PHINode *Phi : UnknownIncomingValues)
    if (!TransitivePHIs.contains(Phi))
      return nullptr;

ps: don't forget to remove the redundant UnknownIncomingValues parameter

}

if (!TransitivePHIs.insert(PN).second)
continue;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return false;
continue;
}

if (auto *Phi = dyn_cast<PHINode>(V)) {
WorkList.push_back(Phi);
continue;
}

// We can't reason about anything else.
return false;
}
}
return true;
}

Constant *InstCostVisitor::visitPHINode(PHINode &I) {
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
return nullptr;

bool Inserted = VisitedPHIs.insert(&I).second;
SmallVector<PHINode *, 8> UnknownIncomingValues;
DenseSet<PHINode *> TransitivePHIs;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's put declaration close to their uses.

Constant *Const = nullptr;
bool HaveSeenIncomingPHI = false;

for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
Value *V = I.getIncomingValue(Idx);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
continue;
Constant *C = findConstantFor(V, KnownConstants);
if (!C) {
if (Inserted)
PendingPHIs.push_back(&I);
return nullptr;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return nullptr;
continue;
}
if (!Const)
Const = C;
else if (C != Const)

if (Inserted) {
// First time we are seeing this phi. We will retry later, after
// all the constant arguments have been propagated. Bail for now.
PendingPHIs.push_back(&I);
return nullptr;
}

if (isa<PHINode>(V)) {
// Perhaps it is a Transitive Phi. We will confirm later.
HaveSeenIncomingPHI = true;
continue;
}

// We can't reason about anything else.
return nullptr;
}

assert(Const && "Should have found at least one constant incoming value");
momchil-velikov marked this conversation as resolved.
Show resolved Hide resolved

if (!HaveSeenIncomingPHI)
return Const;

if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs,
UnknownIncomingValues))
return nullptr;

for (PHINode *Phi : UnknownIncomingValues)
if (!TransitivePHIs.contains(Phi))
return nullptr;

return Const;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=11 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
; FUNCSPEC-LABEL: define i64 @bar(
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; FUNCSPEC-NEXT: entry:
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; FUNCSPEC-NEXT: ret i64 [[ADD]]
;
; NOFUNCSPEC-LABEL: define i64 @bar(
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; NOFUNCSPEC-NEXT: entry:
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
entry:
br i1 %c1, label %l1, label %l9

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l1_75, label %l6

l1_75:
br i1 %c4, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
br label %l2

l4:
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
br i1 %c5, label %l3, label %l6

l5:
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br label %l4

l6:
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
br i1 %c6, label %l4, label %l6_5

l6_5:
br i1 %c7, label %l5, label %l8

l7:
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
br i1 %c8, label %l5, label %l8

l8:
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br i1 %c9, label %l7, label %l9

l9:
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
%sub = sub i64 %phi9, 1
%mul = mul i64 %sub, 2
br i1 %c10, label %l7, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
ret i64 %res
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you checked -funcspec-min-function-size=20 is the "size" of bar? The function seems smaller than the previous example just above. I would expect a lower value would allow specialization. If you have checked that the test is passing disregard this comment.


define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) {
; CHECK-LABEL: define i64 @bar(
; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[F3:%.*]] = call i64 @foo.specialized.1(i64 3, i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]]
; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[ADD]], [[F3]]
; CHECK-NEXT: ret i64 [[ADD2]]
;
entry:
%f1 = call i64 @foo(i64 3, i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%f2 = call i64 @foo(i64 4, i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%f3 = call i64 @foo(i64 3, i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%add = add i64 %f1, %f2
%add2 = add i64 %add, %f3
ret i64 %add2
}

define internal i64 @foo(i64 %n, i64 %m, i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
entry:
br i1 %c1, label %l1, label %l4

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_5 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_5 ], [ %m, %l4 ]
br i1 %c2, label %l4, label %l2

l4:
%phi4 = phi i64 [ %n, %entry ], [ %phi3, %l3 ]
%sub = sub i64 %phi4, 1
%mul = mul i64 %sub, 2
br i1 %c4, label %l3, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l4]
ret i64 %res
}