diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index 2ea9d64f03cb649..ee3cc950cdb503b 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -27,6 +27,7 @@ struct SimplifyCFGOptions { bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; + bool HoistLoadsStoresWithCondFaulting = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool SpeculateBlocks = true; @@ -59,6 +60,10 @@ struct SimplifyCFGOptions { HoistCommonInsts = B; return *this; } + SimplifyCFGOptions &hoistLoadsStoresWithCondFaulting(bool B) { + HoistLoadsStoresWithCondFaulting = B; + return *this; + } SimplifyCFGOptions &sinkCommonInsts(bool B) { SinkCommonInsts = B; return *this; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 17eed97fd950c98..63173c4abb81918 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -848,6 +848,8 @@ Expected parseSimplifyCFGOptions(StringRef Params) { Result.needCanonicalLoops(Enable); } else if (ParamName == "hoist-common-insts") { Result.hoistCommonInsts(Enable); + } else if (ParamName == "hoist-loads-stores-with-cond-faulting") { + Result.hoistLoadsStoresWithCondFaulting(Enable); } else if (ParamName == "sink-common-insts") { Result.sinkCommonInsts(Enable); } else if (ParamName == "speculate-unpredictables") { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 1184123c7710f05..9c3d49cabbd38c8 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1534,9 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() - .convertSwitchRangeToICmp(true) - .speculateUnpredictables(true))); + OptimizePM.addPass( + SimplifyCFGPass(SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .speculateUnpredictables(true) + .hoistLoadsStoresWithCondFaulting(true))); // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 11de37f7a7c1094..daa82a8c368e2b5 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -73,6 +73,11 @@ static cl::opt UserHoistCommonInsts( "hoist-common-insts", cl::Hidden, cl::init(false), cl::desc("hoist common instructions (default = false)")); +static cl::opt UserHoistLoadsStoresWithCondFaulting( + "hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(false), + cl::desc("Hoist loads/stores if the target supports conditional faulting " + "(default = false)")); + static cl::opt UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); @@ -326,6 +331,9 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { Options.NeedCanonicalLoop = UserKeepLoops; if (UserHoistCommonInsts.getNumOccurrences()) Options.HoistCommonInsts = UserHoistCommonInsts; + if (UserHoistLoadsStoresWithCondFaulting.getNumOccurrences()) + Options.HoistLoadsStoresWithCondFaulting = + UserHoistLoadsStoresWithCondFaulting; if (UserSinkCommonInsts.getNumOccurrences()) Options.SinkCommonInsts = UserSinkCommonInsts; if (UserSpeculateUnpredictables.getNumOccurrences()) @@ -354,6 +362,8 @@ void SimplifyCFGPass::printPipeline( << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;"; + OS << (Options.HoistLoadsStoresWithCondFaulting ? "" : "no-") + << "hoist-loads-stores-with-cond-faulting;"; OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;"; OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;"; OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;"; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 92e2d189aff6ff2..15de40c7b09962f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -117,6 +117,18 @@ static cl::opt HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); +static cl::opt HoistLoadsStoresWithCondFaulting( + "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden, + cl::init(true), + cl::desc("Hoist loads/stores if the target supports " + "conditional faulting")); + +static cl::opt HoistLoadsStoresWithCondFaultingThreshold( + "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), + cl::desc("Control the maximal conditonal load/store that we are willing " + "to speculatively execute to eliminate conditional branch " + "(default = 6)")); + static cl::opt HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), @@ -2986,6 +2998,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert, return BIEndProb < Likely; } +static bool isSafeCheapLoadStore(const Instruction *I, + const TargetTransformInfo &TTI) { + // Not handle volatile or atomic. + if (auto *L = dyn_cast(I)) { + if (!L->isSimple()) + return false; + } else if (auto *S = dyn_cast(I)) { + if (!S->isSimple()) + return false; + } else + return false; + + // llvm.masked.load/store use i32 for alignment while load/store use i64. + // That's why we have the alignment limitation. + // FIXME: Update the prototype of the intrinsics? + return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) && + getLoadStoreAlignment(I) < Value::MaximumAlignment; +} + /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -3060,6 +3091,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, SmallVector SpeculatedDbgIntrinsics; unsigned SpeculatedInstructions = 0; + bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting && + Options.HoistLoadsStoresWithCondFaulting; + SmallVector SpeculatedConditionalLoadsStores; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; EphemeralValueTracker EphTracker; @@ -3088,22 +3122,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, // Only speculatively execute a single instruction (not counting the // terminator) for now. - ++SpeculatedInstructions; + bool IsSafeCheapLoadStore = HoistLoadsStores && + isSafeCheapLoadStore(&I, TTI) && + SpeculatedConditionalLoadsStores.size() < + HoistLoadsStoresWithCondFaultingThreshold; + // Not count load/store into cost if target supports conditional faulting + // b/c it's cheap to speculate it. + if (IsSafeCheapLoadStore) + SpeculatedConditionalLoadsStores.push_back(&I); + else + ++SpeculatedInstructions; + if (SpeculatedInstructions > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(&I) && - !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( - &I, BB, ThenBB, EndBB)))) + if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute(&I) && + !(HoistCondStores && !SpeculatedStoreValue && + (SpeculatedStoreValue = + isSafeToSpeculateStore(&I, BB, ThenBB, EndBB)))) return false; - if (!SpeculatedStoreValue && + if (!IsSafeCheapLoadStore && !SpeculatedStoreValue && computeSpeculationCost(&I, TTI) > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. - if (SpeculatedStoreValue) + if (!SpeculatedStore && SpeculatedStoreValue) SpeculatedStore = cast(&I); // Do not hoist the instruction if any of its operands are defined but not @@ -3130,11 +3175,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, // Check that we can insert the selects and that it's not too expensive to do // so. - bool Convert = SpeculatedStore != nullptr; + bool Convert = + SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty(); InstructionCost Cost = 0; Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, - SpeculatedInstructions, - Cost, TTI); + SpeculatedInstructions, Cost, TTI); if (!Convert || Cost > Budget) return false; @@ -3222,6 +3267,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI, BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(), std::prev(ThenBB->end())); + // If the target supports conditional faulting, + // we look for the following pattern: + // \code + // BB: + // ... + // %cond = icmp ult %x, %y + // br i1 %cond, label %TrueBB, label %FalseBB + // FalseBB: + // store i32 1, ptr %q, align 4 + // ... + // TrueBB: + // %maskedloadstore = load i32, ptr %b, align 4 + // store i32 %maskedloadstore, ptr %p, align 4 + // ... + // \endcode + // + // and transform it into: + // + // \code + // BB: + // ... + // %cond = icmp ult %x, %y + // %maskedloadstore = cload i32, ptr %b, %cond + // cstore i32 %maskedloadstore, ptr %p, %cond + // cstore i32 1, ptr %q, ~%cond + // br i1 %cond, label %TrueBB, label %FalseBB + // FalseBB: + // ... + // TrueBB: + // ... + // \endcode + // + // where cload/cstore are represented by llvm.masked.load/store intrinsics, + // e.g. + // + // \code + // %vcond = bitcast i1 %cond to <1 x i1> + // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0 + // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison) + // %maskedloadstore = bitcast <1 x i32> %v0 to i32 + // call void @llvm.masked.store.v1i32.p0 + // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond) + // %cond.not = xor i1 %cond, true + // %vcond.not = bitcast i1 %cond.not to <1 x i> + // call void @llvm.masked.store.v1i32.p0 + // (<1 x i32> , ptr %q, i32 4, <1x i1> %vcond.not) + // \endcode + // + // So we need to turn hoisted load/store into cload/cstore. + auto &Context = BI->getParent()->getContext(); + auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1); + auto *Cond = BI->getOperand(0); + Value *Mask = nullptr; + // Construct the condition if needed. + if (!SpeculatedConditionalLoadsStores.empty()) { + IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back()); + Mask = Builder.CreateBitCast( + Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond, + VCondTy); + } + for (auto *I : SpeculatedConditionalLoadsStores) { + IRBuilder<> Builder(I); + // We currently assume conditional faulting load/store is supported for + // scalar types only when creating new instructions. This can be easily + // extended for vector types in the future. + assert(!getLoadStoreType(I)->isVectorTy() && "not implemented"); + auto *Op0 = I->getOperand(0); + Instruction *MaskedLoadStore = nullptr; + if (auto *LI = dyn_cast(I)) { + // Handle Load. + auto *Ty = I->getType(); + MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1), + Op0, LI->getAlign(), Mask); + I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty)); + } else { + // Handle Store. + auto *StoredVal = + Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1)); + MaskedLoadStore = Builder.CreateMaskedStore( + StoredVal, I->getOperand(1), cast(I)->getAlign(), Mask); + } + // For non-debug metadata, only !annotation, !range, !nonnull and !align are + // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata). + // + // !nonnull, !align : Not support pointer type, no need to keep. + // !range: Load type is changed from scalar to vector, but the metadata on + // vector specifies a per-element range, so the semantics stay the + // same. Keep it. + // !annotation: Not impact semantics. Keep it. + I->dropUBImplyingAttrsAndUnknownMetadata( + {LLVMContext::MD_range, LLVMContext::MD_annotation}); + // FIXME: DIAssignID is not supported for masked store yet. + // (Verifier::visitDIAssignIDMetadata) + at::deleteAssignmentMarkers(I); + I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) { + return Node->getMetadataID() == Metadata::DIAssignIDKind; + }); + MaskedLoadStore->copyMetadata(*I); + I->eraseFromParent(); + } + // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); for (PHINode &PN : EndBB->phis()) { diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index f2e80814f347ade..12f88d60d66cec1 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -49,8 +49,8 @@ ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print,print)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17 ; CHECK-17: function(print,print) -; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg,simplifycfg)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18 -; CHECK-18: function(simplifycfg,simplifycfg) +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg,simplifycfg)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18 +; CHECK-18: function(simplifycfg,simplifycfg) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize,loop-vectorize)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19 ; CHECK-19: function(loop-vectorize,loop-vectorize) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll new file mode 100644 index 000000000000000..405a26de3d6afac --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -O1 -S | FileCheck %s + +;; Test masked.load/store.v1* is generated in simplifycfg and not falls back to branch+load/store in following passes. +define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { +; CHECK-LABEL: @basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> +; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + br label %if.end + +if.true: + %pv = load i16, ptr %p, align 2 + %qv = load i32, ptr %q, align 4 + %bv = load i64, ptr %b, align 8 + store i16 %pv, ptr %b, align 2 + store i32 %qv, ptr %p, align 4 + store i64 %bv, ptr %q, align 8 + br label %if.false + +if.end: + ret void +} diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll new file mode 100644 index 000000000000000..047ca717da80094 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll @@ -0,0 +1,694 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s + +;; Basic case: check masked.load/store is generated for i16/i32/i64. +define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { +; CHECK-LABEL: @basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> +; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + br label %if.end + +if.true: + %0 = load i16, ptr %p, align 2 + %1 = load i32, ptr %q, align 4 + %2 = load i64, ptr %b, align 8 + store i16 %0, ptr %b, align 2 + store i32 %1, ptr %p, align 4 + store i64 %2, ptr %q, align 8 + br label %if.false + +if.end: + ret void +} + +;; Successor 1 branches to successor 0. +define void @succ1to0(ptr %p, ptr %q, i32 %a) { +; CHECK-LABEL: @succ1to0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) +; CHECK-NEXT: ret void +; +entry: + %tobool = icmp ne i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +if.end: + ret void + +if.then: + %0 = load i32, ptr %q + store i32 %0, ptr %p + br label %if.end +} + +;; Successor 1 branches to successor 0 and there is a phi node. +define i32 @succ1to0_phi(ptr %p) { +; CHECK-LABEL: @succ1to0_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]] +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + %cond = icmp eq ptr %p, null + br i1 %cond, label %if.true, label %if.false + +if.false: + %0 = load i32, ptr %p + br label %if.true + +if.true: + %res = phi i32 [ %0, %if.false ], [ 0, %entry ] + ret i32 %res +} + +;; Successor 0 branches to successor 1. +define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) { +; CHECK-LABEL: @succ0to1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.true, label %if.false + +if.false: + store i32 1, ptr %q + br label %if.end + +if.true: + %0 = load i32, ptr %b + store i32 %0, ptr %p + br label %if.false + +if.end: + ret void +} + +;; Load after store can be hoisted. +define i64 @load_after_store(i32 %a, ptr %b, ptr %p) { +; CHECK-LABEL: @load_after_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0 +; CHECK-NEXT: ret i64 [[SPEC_SELECT]] +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.true, label %if.end + +if.true: + store i32 1, ptr %b + %0 = load i16, ptr %p + %zext = zext i16 %0 to i64 + ret i64 %zext + +if.end: + ret i64 0 +} + +;; Speculatable memory read doesn't prevent the hoist. +define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) { +; CHECK-LABEL: @load_skip_speculatable_memory_read( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only() +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.true, label %if.false + +if.false: + store i32 1, ptr %q + br label %if.end + +if.true: + %read = call i32 @read_memory_only() + store i32 %read, ptr %p + br label %if.false + +if.end: + ret void +} + +;; Source of the load can be a GEP. +define i32 @load_from_gep(ptr %p) { +; CHECK-LABEL: @load_from_gep( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]] +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + %cond = icmp eq ptr %p, null + br i1 %cond, label %if.true, label %if.false + +if.false: + %arrayidx = getelementptr inbounds i8, ptr %p, i64 16 + %0 = load i32, ptr %arrayidx + br label %if.true + +if.true: + %res = phi i32 [ %0, %if.false ], [ 0, %entry ] + ret i32 %res +} + +;; Metadata range/annotation are kept. +define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) { +; CHECK-LABEL: @nondebug_metadata( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> +; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison), !range [[RNG5:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META6]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + ret void + +if.true: + %0 = load i16, ptr %p, align 2, !range !{i16 0, i16 10} + %1 = load i32, ptr %q, align 4, !annotation !11 + store i16 %0, ptr %q, align 4, !annotation !11 + store i32 %1, ptr %p, align 2 + br label %if.false +} + +define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) { +; CHECK-LABEL: @debug_metadata_diassign( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> , ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2 +; CHECK-NEXT: ret i16 [[SPEC_SELECT]] +; +bb0: + br i1 %cond, label %if.true, label %if.false + +if.true: + store i16 7, ptr %p, align 4, !DIAssignID !9 + br label %if.false + +if.false: + %ret = phi i16 [ 2, %bb0 ], [ 3, %if.true ] + call void @llvm.dbg.assign(metadata i16 %ret, metadata !8, metadata !DIExpression(), metadata !9, metadata ptr %p, metadata !DIExpression()), !dbg !7 + ret i16 %ret +} + +;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores. +define i32 @hoist_cond_stores(i1 %cond, ptr %p) { +; CHECK-LABEL: @hoist_cond_stores( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i1 false, ptr [[P:%.*]], align 2 +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]]) +; CHECK-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2 +; CHECK-NEXT: ret i32 0 +; +entry: + store i1 false, ptr %p, align 2 + br i1 %cond, label %if.true, label %if.false + +if.true: ; preds = %entry + store i32 0, ptr %p, align 8 + store i1 false, ptr %p, align 2 + br label %if.false + +if.false: ; preds = %if.true, %entry + ret i32 0 +} + +;; Both of successor 0 and successor 1 have a single predecessor. +;; TODO: Support transform for this case. +define void @single_predecessor(ptr %p, ptr %q, i32 %a) { +; CHECK-LABEL: @single_predecessor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %tobool = icmp ne i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +if.end: + store i32 1, ptr %q + ret void + +if.then: + %0 = load i32, ptr %q + store i32 %0, ptr %p + ret void +} + +;; Hoist 6 stores. +define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) { +; CHECK-LABEL: @threshold_6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> , ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + store i32 1, ptr %p1, align 4 + store i32 2, ptr %p2, align 4 + store i32 3, ptr %p3, align 4 + store i32 4, ptr %p4, align 4 + store i32 5, ptr %p5, align 4 + store i32 6, ptr %p6, align 4 + br label %if.false + +if.false: + ret void +} + +;; Not hoist 7 stores. +define void @threshold_7(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6, ptr %p7) { +; CHECK-LABEL: @threshold_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: store i32 1, ptr [[P1:%.*]], align 4 +; CHECK-NEXT: store i32 2, ptr [[P2:%.*]], align 4 +; CHECK-NEXT: store i32 3, ptr [[P3:%.*]], align 4 +; CHECK-NEXT: store i32 4, ptr [[P4:%.*]], align 4 +; CHECK-NEXT: store i32 5, ptr [[P5:%.*]], align 4 +; CHECK-NEXT: store i32 6, ptr [[P6:%.*]], align 4 +; CHECK-NEXT: store i32 7, ptr [[P7:%.*]], align 4 +; CHECK-NEXT: br label [[IF_FALSE]] +; CHECK: if.false: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + store i32 1, ptr %p1, align 4 + store i32 2, ptr %p2, align 4 + store i32 3, ptr %p3, align 4 + store i32 4, ptr %p4, align 4 + store i32 5, ptr %p5, align 4 + store i32 6, ptr %p6, align 4 + store i32 7, ptr %p7, align 4 + br label %if.false + +if.false: + ret void +} + +;; Not do hoist if the cost of instructions to be hoisted is expensive. +define i32 @not_cheap_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) { +; CHECK-LABEL: @not_cheap_to_hoist( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[IF_TRUE]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: if.false: +; CHECK-NEXT: store i64 1, ptr [[P:%.*]], align 8 +; CHECK-NEXT: store i16 2, ptr [[Q:%.*]], align 2 +; CHECK-NEXT: [[V:%.*]] = udiv i32 [[A]], 12345 +; CHECK-NEXT: [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]] +; CHECK-NEXT: [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]] +; CHECK-NEXT: [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]] +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[P]], align 4 +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.true, label %if.false + +if.false: + store i64 1, ptr %p + store i16 2, ptr %q + + %v = udiv i32 %a, 12345 + %vv = mul i32 %v, %v0 + %vvv = mul i32 %vv, %v1 + %vvvv = select i1 %cc, i32 %v2, i32 %vvv + ret i32 %vvvv + +if.true: + %0 = load i32, ptr %b + store i32 %0, ptr %p + br label %if.end + +if.end: + ret i32 0 +} + +;; Not hoist if there is more than 1 prodecessor. +define void @not_single_predecessor(ptr %p, ptr %q, i32 %a) { +; CHECK-LABEL: @not_single_predecessor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br label [[IF_THEN]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; +entry: + %tobool = icmp ne i32 %a, 0 + br i1 %tobool, label %if.end, label %if.then + +if.end: + br label %if.then + +if.then: + %1 = load i32, ptr %q + store i32 %1, ptr %p + br label %if.end +} + +;; Not hoist b/c i8 is not supported by conditional faulting. +define void @not_supported_type(i8 %a, ptr %b, ptr %p, ptr %q) { +; CHECK-LABEL: @not_supported_type( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: store i8 1, ptr [[Q:%.*]], align 1 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B:%.*]], align 1 +; CHECK-NEXT: store i8 [[TMP0]], ptr [[P:%.*]], align 1 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i8 %a, 0 + br i1 %cond, label %if.true, label %if.false + +if.false: + store i8 1, ptr %q + br label %if.end + +if.true: + %0 = load i8, ptr %b + store i8 %0, ptr %p + br label %if.end + +if.end: + ret void +} + +;; Not hoist if the terminator is not br. +define void @not_br_terminator(i32 %a, ptr %b, ptr %p, ptr %q) { +; CHECK-LABEL: @not_br_terminator( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[A:%.*]], label [[IF_END:%.*]] [ +; CHECK-NEXT: i32 1, label [[IF_FALSE:%.*]] +; CHECK-NEXT: i32 2, label [[IF_TRUE:%.*]] +; CHECK-NEXT: ] +; CHECK: if.false: +; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.true: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[IF_FALSE]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + switch i32 %a, label %if.end [ + i32 1, label %if.false + i32 2, label %if.true + ] + +if.false: + store i32 1, ptr %q, align 4 + br label %if.end + +if.true: + %0 = load i32, ptr %b, align 4 + store i32 %0, ptr %p, align 4 + br label %if.false + +if.end: + ret void +} + +;; Not hoist if the instruction to be hoist is atomic. +define void @not_atomic(i1 %cond, ptr %p) { +; CHECK-LABEL: @not_atomic( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: store atomic i32 1, ptr [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: br label [[IF_TRUE]] +; CHECK: if.true: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + store atomic i32 1, ptr %p seq_cst, align 4 + br label %if.true + +if.true: + ret void +} + +;; Not hoist if the instruction to be hoist is volatile. +define void @not_volatile(i1 %cond, ptr %p) { +; CHECK-LABEL: @not_volatile( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[IF_TRUE]] +; CHECK: if.true: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + %0 = load volatile i32, ptr %p, align 4 + br label %if.true + +if.true: + ret void +} + +;; Not hoist if there is an instruction that has side effect in the same bb. +define void @not_hoistable_sideeffect(i1 %cond, ptr %p, ptr %q) { +; CHECK-LABEL: @not_hoistable_sideeffect( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: [[RMW:%.*]] = atomicrmw xchg ptr [[Q:%.*]], double 4.000000e+00 seq_cst, align 8 +; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[IF_TRUE]] +; CHECK: if.true: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.false: + %rmw= atomicrmw xchg ptr %q, double 4.0 seq_cst + store i32 1, ptr %p, align 4 + br label %if.true + +if.true: + ret void +} + +;; Not hoist if the branch is predictable and the `then` BB is not likely to execute. +define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) { +; CHECK-LABEL: @not_likely_to_execute( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF7:![0-9]+]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; +entry: + %tobool = icmp ne i32 %a, 0 + br i1 %tobool, label %if.then, label %if.end, !prof !10 + +if.end: + ret void + +if.then: + %0 = load i32, ptr %q + store i32 %0, ptr %p + br label %if.end +} + +;; Now the optimization hoist-loads-stores-with-cond-faulting is run in codegen, +;; which is after sroa and alloca is optimized away. So we don't need to do the transform +;; for this case. But in the future, it is probably moved before sroa. +define void @not_alloca(ptr %p, ptr %q, i32 %a) { +; CHECK-LABEL: @not_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR]], align 8 +; CHECK-NEXT: store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8 +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Q_ADDR]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %p.addr = alloca ptr + %q.addr = alloca ptr + %a.addr = alloca i32 + store ptr %p, ptr %p.addr + store ptr %q, ptr %q.addr + store i32 %a, ptr %a.addr + %0 = load i32, ptr %a.addr + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: + %1 = load ptr, ptr %q.addr + %2 = load i32, ptr %1 + %3 = load ptr, ptr %p.addr + store i32 %2, ptr %3 + br label %if.end + +if.end: + ret void +} + +;; Not transform if alignment = 2^32. +define void @not_maximum_alignment(i1 %cond, ptr %p) { +; CHECK-LABEL: @not_maximum_alignment( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4294967296 +; CHECK-NEXT: br label [[IF_FALSE]] +; CHECK: if.false: +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + store i32 0, ptr %p, align 4294967296 + br label %if.false + +if.false: + ret void +} + +declare i32 @read_memory_only() readonly nounwind willreturn speculatable + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "clang") +!1 = !DIFile(filename: "foo.c", directory: "/tmp") +!2 = !{i32 2, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang"} +!5 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0) +!7 = !DILocation(line: 5, column: 7, scope: !6) +!8 = !DILocalVariable(name: "a", scope: !6, line: 6, type: !5) +!9 = distinct !DIAssignID() +!10 = !{!"branch_weights", i32 1, i32 99} +!11 = !{ !"auto-init" }