diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 50f9aae73dc53e..b001771951e0fe 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -217,6 +217,22 @@ class InstCostVisitor : public InstVisitor { Cost estimateSwitchInst(SwitchInst &I); Cost estimateBranchInst(BranchInst &I); + // Transitively Incoming Values (TIV) is a set of Values that can "feed" a + // value to the initial PHI-node. It is defined like this: + // + // * the initial PHI-node belongs to TIV. + // + // * for every PHI-node in TIV, its operands belong to TIV + // + // If TIV for the initial PHI-node (P) contains more than one constant or a + // value that is not a PHI-node, then P cannot be folded to a constant. + // + // As soon as we detect these cases, we bail, without constructing the + // full TIV. + // Otherwise P can be folded to the one constant in TIV. + bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root, + DenseSet &TransitivePHIs); + Constant *visitInstruction(Instruction &I) { return nullptr; } Constant *visitPHINode(PHINode &I); Constant *visitFreezeInst(FreezeInst &I); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index b75ca7761a60b6..a4c12006ee2433 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -39,10 +39,17 @@ static cl::opt MaxClones( "The maximum number of clones allowed for a single function " "specialization")); +static cl::opt + MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100), + cl::Hidden, + cl::desc("The maximum number of iterations allowed " + "when searching for transitive " + "phis")); + static cl::opt MaxIncomingPhiValues( - "funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc( - "The maximum number of incoming values a PHI node can have to be " - "considered during the specialization bonus estimation")); + "funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden, + cl::desc("The maximum number of incoming values a PHI node can have to be " + "considered during the specialization bonus estimation")); static cl::opt MaxBlockPredecessors( "funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc( @@ -64,9 +71,9 @@ static cl::opt MinCodeSizeSavings( "much percent of the original function size")); static cl::opt MinLatencySavings( - "funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc( - "Reject specializations whose latency savings are less than this" - "much percent of the original function size")); + "funcspec-min-latency-savings", cl::init(40), cl::Hidden, + cl::desc("Reject specializations whose latency savings are less than this" + "much percent of the original function size")); static cl::opt MinInliningBonus( "funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc( @@ -262,29 +269,102 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) { return estimateBasicBlocks(WorkList); } +bool InstCostVisitor::discoverTransitivelyIncomingValues( + Constant *Const, PHINode *Root, DenseSet &TransitivePHIs) { + + SmallVector WorkList; + WorkList.push_back(Root); + unsigned Iter = 0; + + while (!WorkList.empty()) { + PHINode *PN = WorkList.pop_back_val(); + + if (++Iter > MaxDiscoveryIterations || + PN->getNumIncomingValues() > MaxIncomingPhiValues) + return false; + + if (!TransitivePHIs.insert(PN).second) + continue; + + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + Value *V = PN->getIncomingValue(I); + + // Disregard self-references and dead incoming values. + if (auto *Inst = dyn_cast(V)) + if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I))) + continue; + + if (Constant *C = findConstantFor(V, KnownConstants)) { + // Not all incoming values are the same constant. Bail immediately. + if (C != Const) + return false; + continue; + } + + if (auto *Phi = dyn_cast(V)) { + WorkList.push_back(Phi); + continue; + } + + // We can't reason about anything else. + return false; + } + } + return true; +} + Constant *InstCostVisitor::visitPHINode(PHINode &I) { if (I.getNumIncomingValues() > MaxIncomingPhiValues) return nullptr; bool Inserted = VisitedPHIs.insert(&I).second; Constant *Const = nullptr; + bool HaveSeenIncomingPHI = false; for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) { Value *V = I.getIncomingValue(Idx); + + // Disregard self-references and dead incoming values. if (auto *Inst = dyn_cast(V)) if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx))) continue; - Constant *C = findConstantFor(V, KnownConstants); - if (!C) { - if (Inserted) - PendingPHIs.push_back(&I); - return nullptr; + + if (Constant *C = findConstantFor(V, KnownConstants)) { + if (!Const) + Const = C; + // Not all incoming values are the same constant. Bail immediately. + if (C != Const) + return nullptr; + continue; } - if (!Const) - Const = C; - else if (C != Const) + + if (Inserted) { + // First time we are seeing this phi. We will retry later, after + // all the constant arguments have been propagated. Bail for now. + PendingPHIs.push_back(&I); return nullptr; + } + + if (isa(V)) { + // Perhaps it is a Transitive Phi. We will confirm later. + HaveSeenIncomingPHI = true; + continue; + } + + // We can't reason about anything else. + return nullptr; } + + if (!Const) + return nullptr; + + if (!HaveSeenIncomingPHI) + return Const; + + DenseSet TransitivePHIs; + if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs)) + return nullptr; + return Const; } diff --git a/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll new file mode 100644 index 00000000000000..b4c24715037bca --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/discover-transitive-phis.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=16 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC + +define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) { +; FUNCSPEC-LABEL: define i64 @bar( +; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) { +; FUNCSPEC-NEXT: entry: +; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]] +; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]] +; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]] +; FUNCSPEC-NEXT: ret i64 [[ADD]] +; +; NOFUNCSPEC-LABEL: define i64 @bar( +; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) { +; NOFUNCSPEC-NEXT: entry: +; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]] +; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]] +; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]] +; NOFUNCSPEC-NEXT: ret i64 [[ADD]] +; +entry: + %f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) + %f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) + %add = add i64 %f1, %f2 + ret i64 %add +} + +define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) { +entry: + br i1 %c1, label %l1, label %l9 + +l1: + %phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ] + %add = add i64 %phi1, 1 + %div = sdiv i64 %add, 2 + br i1 %c2, label %l1_5, label %exit + +l1_5: + br i1 %c3, label %l1_75, label %l6 + +l1_75: + br i1 %c4, label %l2, label %l3 + +l2: + %phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ] + br label %l1 + +l3: + %phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ] + br label %l2 + +l4: + %phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ] + br i1 %c5, label %l3, label %l6 + +l5: + %phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ] + br label %l4 + +l6: + %phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ] + br i1 %c6, label %l4, label %l6_5 + +l6_5: + br i1 %c7, label %l5, label %l8 + +l7: + %phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ] + br i1 %c8, label %l5, label %l8 + +l8: + %phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ] + br i1 %c9, label %l7, label %l9 + +l9: + %phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ] + %sub = sub i64 %phi9, 1 + %mul = mul i64 %sub, 2 + br i1 %c10, label %l7, label %exit + +exit: + %res = phi i64 [ %div, %l1 ], [ %mul, %l9] + ret i64 %res +} + diff --git a/llvm/test/Transforms/FunctionSpecialization/phi-nodes-can-constfold.ll b/llvm/test/Transforms/FunctionSpecialization/phi-nodes-can-constfold.ll new file mode 100644 index 00000000000000..5865b5492e1f54 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/phi-nodes-can-constfold.ll @@ -0,0 +1,42 @@ +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s + +define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) { +; CHECK-LABEL: define i64 @bar( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]]) +; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]]) +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]] +; CHECK-NEXT: ret i64 [[ADD]] +; +entry: + %f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4) + %f2 = call i64 @foo(i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4) + %add = add i64 %f1, %f2 + ret i64 %add +} + +define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4) { +entry: + br label %l0 + +l1: + %phi1 = phi i64 [ %phi0, %l0 ], [ %phi2, %l2 ] + %add = add i64 %phi1, 1 + %div = sdiv i64 %add, 2 + br i1 %c2, label %l2, label %exit + +l2: + %phi2 = phi i64 [ %phi0, %l0 ], [ %phi1, %l1 ] + %sub = sub i64 %phi2, 1 + %mul = mul i64 %sub, 2 + br i1 %c4, label %l1, label %exit + +l0: + %phi0 = phi i64 [ %n, %entry ] + br i1 %c1, label %l1, label %l2 + +exit: + %res = phi i64 [ %div, %l1 ], [ %mul, %l2] + ret i64 %res +} diff --git a/llvm/test/Transforms/FunctionSpecialization/phi-nodes-non-constfoldable.ll b/llvm/test/Transforms/FunctionSpecialization/phi-nodes-non-constfoldable.ll new file mode 100644 index 00000000000000..11b71d6667b985 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/phi-nodes-non-constfoldable.ll @@ -0,0 +1,54 @@ +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s + +define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) { +; CHECK-LABEL: define i64 @bar( +; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]]) +; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]]) +; CHECK-NEXT: [[F3:%.*]] = call i64 @foo.specialized.1(i64 3, i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]]) +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]] +; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[ADD]], [[F3]] +; CHECK-NEXT: ret i64 [[ADD2]] +; +entry: + %f1 = call i64 @foo(i64 3, i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4) + %f2 = call i64 @foo(i64 4, i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4) + %f3 = call i64 @foo(i64 3, i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4) + %add = add i64 %f1, %f2 + %add2 = add i64 %add, %f3 + ret i64 %add2 +} + +define internal i64 @foo(i64 %n, i64 %m, i1 %c1, i1 %c2, i1 %c3, i1 %c4) { +entry: + br i1 %c1, label %l1, label %l4 + +l1: + %phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ] + %add = add i64 %phi1, 1 + %div = sdiv i64 %add, 2 + br i1 %c2, label %l1_5, label %exit + +l1_5: + br i1 %c3, label %l2, label %l3 + +l2: + %phi2 = phi i64 [ %phi1, %l1_5 ], [ %phi3, %l3 ] + br label %l1 + +l3: + %phi3 = phi i64 [ %phi1, %l1_5 ], [ %m, %l4 ] + br i1 %c2, label %l4, label %l2 + +l4: + %phi4 = phi i64 [ %n, %entry ], [ %phi3, %l3 ] + %sub = sub i64 %phi4, 1 + %mul = mul i64 %sub, 2 + br i1 %c4, label %l3, label %exit + +exit: + %res = phi i64 [ %div, %l1 ], [ %mul, %l4] + ret i64 %res +} +