Skip to content

Commit

Permalink
[SimplifyCFG] use profile metadata to refine merging branch conditions
Browse files Browse the repository at this point in the history
This is one step towards solving:
https://llvm.org/PR49336

In that example, we disregard the recommended usage of builtin_expect,
so an expensive (unpredictable) branch is folded into another branch
that is guarding it.
Here, we read the profile metadata to see if the 1st (predecessor)
condition is likely to cause execution to bypass the 2nd (successor)
condition before merging conditions by using logic ops.

Differential Revision: https://reviews.llvm.org/D98898
  • Loading branch information
rotateright committed Mar 22, 2021
1 parent 424bf5d commit 27ae17a
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 44 deletions.
59 changes: 41 additions & 18 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -2840,38 +2841,60 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
}
}

// Determine if the two branches share a common destination,
// and deduce a glue that we need to use to join branch's conditions
// to arrive at the common destination.
/// Determine if the two branches share a common destination and deduce a glue
/// that joins branch's conditions to arrive at the common destination if that
/// would be profitable.
static Optional<std::pair<Instruction::BinaryOps, bool>>
CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
const TargetTransformInfo *TTI) {
assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
"Both blocks must end with a conditional branches.");
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
"PredBB must be a predecessor of BB.");

if (PBI->getSuccessor(0) == BI->getSuccessor(0))
return {{Instruction::Or, false}};
else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
return {{Instruction::And, false}};
else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
return {{Instruction::And, true}};
else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
return {{Instruction::Or, true}};
// We have the potential to fold the conditions together, but if the
// predecessor branch is predictable, we may not want to merge them.
uint64_t PTWeight, PFWeight;
BranchProbability PBITrueProb, Likely;
if (PBI->extractProfMetadata(PTWeight, PFWeight) &&
(PTWeight + PFWeight) != 0) {
PBITrueProb =
BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
Likely = TTI->getPredictableBranchThreshold();
}

if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
// Speculate the 2nd condition unless the 1st is probably true.
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
return {{Instruction::Or, false}};
} else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
// Speculate the 2nd condition unless the 1st is probably false.
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
return {{Instruction::And, false}};
} else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
// Speculate the 2nd condition unless the 1st is probably true.
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
return {{Instruction::And, true}};
} else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
// Speculate the 2nd condition unless the 1st is probably false.
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
return {{Instruction::Or, true}};
}
return None;
}

static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU) {
MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI) {
BasicBlock *BB = BI->getParent();
BasicBlock *PredBlock = PBI->getParent();

// Determine if the two branches share a common destination.
Instruction::BinaryOps Opc;
bool InvertPredCond;
std::tie(Opc, InvertPredCond) =
*CheckIfCondBranchesShareCommonDestination(BI, PBI);
*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);

LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);

Expand Down Expand Up @@ -3059,8 +3082,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Determine if the two branches share a common destination.
Instruction::BinaryOps Opc;
bool InvertPredCond;
if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
std::tie(Opc, InvertPredCond) = *Recepie;
if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
std::tie(Opc, InvertPredCond) = *Recipe;
else
continue;

Expand All @@ -3077,7 +3100,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
continue;
}

return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
}
return Changed;
}
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/PGOProfile/chr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
; CHECK-LABEL: @test_chr_14(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1
; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
; CHECK: entry.split.nonchr:
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16
; CHECK: bb0.nonchr:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB1]]
Expand Down Expand Up @@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
; CHECK-NEXT: ], !prof !20
; CHECK-NEXT: ], !prof !19
; CHECK: bb2:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: call void @foo()
Expand Down Expand Up @@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 {
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20
; CHECK: bb0:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB1]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20
; CHECK: bb2:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[BB3]]
Expand Down Expand Up @@ -2550,4 +2551,3 @@ bb3:
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}
60 changes: 41 additions & 19 deletions llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
Original file line number Diff line number Diff line change
Expand Up @@ -636,16 +636,17 @@ exit:
ret i32 %outval
}

; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.

define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_harmful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -668,16 +669,17 @@ exit:
ret void
}

; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.

define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -700,15 +702,16 @@ exit:
ret void
}

; The probability threshold is set by a builtin_expect setting.
; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.

define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_not_that_harmful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -731,13 +734,16 @@ exit:
ret void
}

; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.

define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_not_that_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -760,13 +766,15 @@ exit:
ret void
}

; The 1st cmp is probably true, so speculating the 2nd is probably a win.

define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_useful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -789,13 +797,15 @@ exit:
ret void
}

; The 1st cmp is probably false, so speculating the 2nd is probably a win.

define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @or_icmps_useful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand Down Expand Up @@ -849,16 +859,17 @@ exit:
ret void
}

; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.

define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_harmful(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -881,16 +892,17 @@ exit:
ret void
}

; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
; Merging the icmps with logic-op defeats the purpose of the metadata.
; We can't tell which condition is expensive if they are combined.

define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_harmful_inverted(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
; CHECK: rare:
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
; CHECK: false:
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
; CHECK-NEXT: br label [[EXIT]]
Expand All @@ -913,6 +925,9 @@ exit:
ret void
}

; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.

define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_not_that_harmful(
; CHECK-NEXT: entry:
Expand Down Expand Up @@ -942,6 +957,9 @@ exit:
ret void
}

; The probability threshold is determined by a TTI setting.
; In this example, we are just short of strongly expected, so speculate.

define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_not_that_harmful_inverted(
; CHECK-NEXT: entry:
Expand Down Expand Up @@ -971,6 +989,8 @@ exit:
ret void
}

; The 1st cmp is probably true, so speculating the 2nd is probably a win.

define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_useful(
; CHECK-NEXT: entry:
Expand Down Expand Up @@ -1000,6 +1020,8 @@ exit:
ret void
}

; The 1st cmp is probably false, so speculating the 2nd is probably a win.

define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
; CHECK-LABEL: @and_icmps_useful_inverted(
; CHECK-NEXT: entry:
Expand Down

0 comments on commit 27ae17a

Please sign in to comment.