Skip to content

Commit

Permalink
[X86,SimplifyCFG] Support hoisting load/store with conditional faulti…
Browse files Browse the repository at this point in the history
…ng (Part I) (llvm#96878)

This is simplifycfg part of
llvm#95515

In this PR, we support hoisting load/store with conditional faulting in
`SimplifyCFGOpt::speculativelyExecuteBB` to eliminate conditional
branches.
This is for cases like
```
void test (int a, int *b) {
  if (a)
   *b = a;
}
``` 

In the following patches, we will support the hoist in
`SimplifyCFGOpt::hoistCommonCodeFromSuccessors`.
That is for cases like
```
void test (int a, int *c, int *d) {
  if (a)
   *c = a;
  else 
   *d = a;
}
```
  • Loading branch information
KanRobert authored Aug 29, 2024
1 parent 438ad9f commit 87c86aa
Show file tree
Hide file tree
Showing 8 changed files with 913 additions and 14 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct SimplifyCFGOptions {
bool ConvertSwitchToLookupTable = false;
bool NeedCanonicalLoop = true;
bool HoistCommonInsts = false;
bool HoistLoadsStoresWithCondFaulting = false;
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
bool SpeculateBlocks = true;
Expand Down Expand Up @@ -59,6 +60,10 @@ struct SimplifyCFGOptions {
HoistCommonInsts = B;
return *this;
}
SimplifyCFGOptions &hoistLoadsStoresWithCondFaulting(bool B) {
HoistLoadsStoresWithCondFaulting = B;
return *this;
}
SimplifyCFGOptions &sinkCommonInsts(bool B) {
SinkCommonInsts = B;
return *this;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.needCanonicalLoops(Enable);
} else if (ParamName == "hoist-common-insts") {
Result.hoistCommonInsts(Enable);
} else if (ParamName == "hoist-loads-stores-with-cond-faulting") {
Result.hoistLoadsStoresWithCondFaulting(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (ParamName == "speculate-unpredictables") {
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1534,9 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,

// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.speculateUnpredictables(true)));
OptimizePM.addPass(
SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.speculateUnpredictables(true)
.hoistLoadsStoresWithCondFaulting(true)));

// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ static cl::opt<bool> UserHoistCommonInsts(
"hoist-common-insts", cl::Hidden, cl::init(false),
cl::desc("hoist common instructions (default = false)"));

static cl::opt<bool> UserHoistLoadsStoresWithCondFaulting(
"hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(false),
cl::desc("Hoist loads/stores if the target supports conditional faulting "
"(default = false)"));

static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
Expand Down Expand Up @@ -326,6 +331,9 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.NeedCanonicalLoop = UserKeepLoops;
if (UserHoistCommonInsts.getNumOccurrences())
Options.HoistCommonInsts = UserHoistCommonInsts;
if (UserHoistLoadsStoresWithCondFaulting.getNumOccurrences())
Options.HoistLoadsStoresWithCondFaulting =
UserHoistLoadsStoresWithCondFaulting;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
if (UserSpeculateUnpredictables.getNumOccurrences())
Expand Down Expand Up @@ -354,6 +362,8 @@ void SimplifyCFGPass::printPipeline(
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
OS << (Options.HoistLoadsStoresWithCondFaulting ? "" : "no-")
<< "hoist-loads-stores-with-cond-faulting;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
Expand Down
164 changes: 155 additions & 9 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,18 @@ static cl::opt<bool>
HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
cl::desc("Hoist common instructions up to the parent block"));

static cl::opt<bool> HoistLoadsStoresWithCondFaulting(
"simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
cl::init(true),
cl::desc("Hoist loads/stores if the target supports "
"conditional faulting"));

static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
cl::desc("Control the maximal conditonal load/store that we are willing "
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));

static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
Expand Down Expand Up @@ -2986,6 +2998,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
return BIEndProb < Likely;
}

static bool isSafeCheapLoadStore(const Instruction *I,
const TargetTransformInfo &TTI) {
// Not handle volatile or atomic.
if (auto *L = dyn_cast<LoadInst>(I)) {
if (!L->isSimple())
return false;
} else if (auto *S = dyn_cast<StoreInst>(I)) {
if (!S->isSimple())
return false;
} else
return false;

// llvm.masked.load/store use i32 for alignment while load/store use i64.
// That's why we have the alignment limitation.
// FIXME: Update the prototype of the intrinsics?
return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
getLoadStoreAlignment(I) < Value::MaximumAlignment;
}

/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
Expand Down Expand Up @@ -3060,6 +3091,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;

unsigned SpeculatedInstructions = 0;
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
Expand Down Expand Up @@ -3088,22 +3122,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,

// Only speculatively execute a single instruction (not counting the
// terminator) for now.
++SpeculatedInstructions;
bool IsSafeCheapLoadStore = HoistLoadsStores &&
isSafeCheapLoadStore(&I, TTI) &&
SpeculatedConditionalLoadsStores.size() <
HoistLoadsStoresWithCondFaultingThreshold;
// Not count load/store into cost if target supports conditional faulting
// b/c it's cheap to speculate it.
if (IsSafeCheapLoadStore)
SpeculatedConditionalLoadsStores.push_back(&I);
else
++SpeculatedInstructions;

if (SpeculatedInstructions > 1)
return false;

// Don't hoist the instruction if it's unsafe or expensive.
if (!isSafeToSpeculativelyExecute(&I) &&
!(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
&I, BB, ThenBB, EndBB))))
if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute(&I) &&
!(HoistCondStores && !SpeculatedStoreValue &&
(SpeculatedStoreValue =
isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
return false;
if (!SpeculatedStoreValue &&
if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
computeSpeculationCost(&I, TTI) >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;

// Store the store speculation candidate.
if (SpeculatedStoreValue)
if (!SpeculatedStore && SpeculatedStoreValue)
SpeculatedStore = cast<StoreInst>(&I);

// Do not hoist the instruction if any of its operands are defined but not
Expand All @@ -3130,11 +3175,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,

// Check that we can insert the selects and that it's not too expensive to do
// so.
bool Convert = SpeculatedStore != nullptr;
bool Convert =
SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
InstructionCost Cost = 0;
Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
SpeculatedInstructions,
Cost, TTI);
SpeculatedInstructions, Cost, TTI);
if (!Convert || Cost > Budget)
return false;

Expand Down Expand Up @@ -3222,6 +3267,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
std::prev(ThenBB->end()));

// If the target supports conditional faulting,
// we look for the following pattern:
// \code
// BB:
// ...
// %cond = icmp ult %x, %y
// br i1 %cond, label %TrueBB, label %FalseBB
// FalseBB:
// store i32 1, ptr %q, align 4
// ...
// TrueBB:
// %maskedloadstore = load i32, ptr %b, align 4
// store i32 %maskedloadstore, ptr %p, align 4
// ...
// \endcode
//
// and transform it into:
//
// \code
// BB:
// ...
// %cond = icmp ult %x, %y
// %maskedloadstore = cload i32, ptr %b, %cond
// cstore i32 %maskedloadstore, ptr %p, %cond
// cstore i32 1, ptr %q, ~%cond
// br i1 %cond, label %TrueBB, label %FalseBB
// FalseBB:
// ...
// TrueBB:
// ...
// \endcode
//
// where cload/cstore are represented by llvm.masked.load/store intrinsics,
// e.g.
//
// \code
// %vcond = bitcast i1 %cond to <1 x i1>
// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
// %maskedloadstore = bitcast <1 x i32> %v0 to i32
// call void @llvm.masked.store.v1i32.p0
// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
// %cond.not = xor i1 %cond, true
// %vcond.not = bitcast i1 %cond.not to <1 x i>
// call void @llvm.masked.store.v1i32.p0
// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
// \endcode
//
// So we need to turn hoisted load/store into cload/cstore.
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
Value *Mask = nullptr;
// Construct the condition if needed.
if (!SpeculatedConditionalLoadsStores.empty()) {
IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
Mask = Builder.CreateBitCast(
Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
}
for (auto *I : SpeculatedConditionalLoadsStores) {
IRBuilder<> Builder(I);
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
// extended for vector types in the future.
assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
auto *Op0 = I->getOperand(0);
Instruction *MaskedLoadStore = nullptr;
if (auto *LI = dyn_cast<LoadInst>(I)) {
// Handle Load.
auto *Ty = I->getType();
MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
Op0, LI->getAlign(), Mask);
I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
} else {
// Handle Store.
auto *StoredVal =
Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
MaskedLoadStore = Builder.CreateMaskedStore(
StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
}
// For non-debug metadata, only !annotation, !range, !nonnull and !align are
// kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
//
// !nonnull, !align : Not support pointer type, no need to keep.
// !range: Load type is changed from scalar to vector, but the metadata on
// vector specifies a per-element range, so the semantics stay the
// same. Keep it.
// !annotation: Not impact semantics. Keep it.
I->dropUBImplyingAttrsAndUnknownMetadata(
{LLVMContext::MD_range, LLVMContext::MD_annotation});
// FIXME: DIAssignID is not supported for masked store yet.
// (Verifier::visitDIAssignIDMetadata)
at::deleteAssignmentMarkers(I);
I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
return Node->getMetadataID() == Metadata::DIAssignIDKind;
});
MaskedLoadStore->copyMetadata(*I);
I->eraseFromParent();
}

// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
for (PHINode &PN : EndBB->phis()) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Other/new-pm-print-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -O1 -S | FileCheck %s

;; Test masked.load/store.v1* is generated in simplifycfg and not falls back to branch+load/store in following passes.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @basic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false

if.false:
br label %if.end

if.true:
%pv = load i16, ptr %p, align 2
%qv = load i32, ptr %q, align 4
%bv = load i64, ptr %b, align 8
store i16 %pv, ptr %b, align 2
store i32 %qv, ptr %p, align 4
store i64 %bv, ptr %q, align 8
br label %if.false

if.end:
ret void
}
Loading

0 comments on commit 87c86aa

Please sign in to comment.