diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6628f3d53f56a14..a9ee9f62197e17a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8608,7 +8608,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // loop region contains a header and latch basic blocks. VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE()); + *PSE.getSE(), OrigLoop->getLoopPreheader()); VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); @@ -8856,7 +8856,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // Create new empty VPlan auto Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), - *PSE.getSE()); + *PSE.getSE(), OrigLoop->getLoopPreheader()); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index bf467eb8434f887..f17be451e6846ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -442,6 +442,16 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { return NewBB; } +void VPIRBasicBlock::execute(VPTransformState *State) { + assert(getHierarchicalPredecessors().empty() && + "VPIRBasicBlock cannot have predecessors at the moment"); + assert(getHierarchicalSuccessors().empty() && + "VPIRBasicBlock cannot have successors at the moment"); + + State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator()); + executeRecipes(State, getIRBasicBlock()); +} + void VPBasicBlock::execute(VPTransformState *State) { bool Replica = State->Instance && !State->Instance->isFirstIteration(); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; @@ -499,16 +509,7 @@ void VPBasicBlock::execute(VPTransformState *State) { } // 2. Fill the IR basic block with IR instructions. - LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() - << " in BB:" << NewBB->getName() << '\n'); - - State->CFG.VPBB2IRBB[this] = NewBB; - State->CFG.PrevVPBB = this; - - for (VPRecipeBase &Recipe : Recipes) - Recipe.execute(*State); - - LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); + executeRecipes(State, NewBB); } void VPBasicBlock::dropAllReferences(VPValue *NewValue) { @@ -521,6 +522,19 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) { } } +void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { + LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() + << " in BB:" << BB->getName() << '\n'); + + State->CFG.VPBB2IRBB[this] = BB; + State->CFG.PrevVPBB = this; + + for (VPRecipeBase &Recipe : Recipes) + Recipe.execute(*State); + + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB); +} + VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { assert((SplitAt == end() || SplitAt->getParent() == this) && "can only split at a position in the same block"); @@ -769,8 +783,9 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { - VPBasicBlock *Preheader = new VPBasicBlock("ph"); +VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, + BasicBlock *PH) { + VPIRBasicBlock *Preheader = new VPIRBasicBlock(PH); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e75a1de548f7d45..8734389ceb0582e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -473,7 +473,7 @@ class VPBlockBase { /// that are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPBlockBase objects. They are used for concrete /// type identification. - using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; + using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC }; using VPBlocksTy = SmallVectorImpl; @@ -2833,10 +2833,13 @@ class VPBasicBlock : public VPBlockBase { public: using RecipeListTy = iplist; -private: +protected: /// The VPRecipes held in the order of output instructions to generate. RecipeListTy Recipes; + VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "") + : VPBlockBase(BlockSC, Name.str()) {} + public: VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) : VPBlockBase(VPBasicBlockSC, Name.str()) { @@ -2885,7 +2888,8 @@ class VPBasicBlock : public VPBlockBase { /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPBlockBase *V) { - return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; + return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC || + V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; } void insert(VPRecipeBase *Recipe, iterator InsertPt) { @@ -2948,12 +2952,48 @@ class VPBasicBlock : public VPBlockBase { return NewBlock; } +protected: + /// Execute the recipes in the IR basic block \p BB. + void executeRecipes(VPTransformState *State, BasicBlock *BB); + private: /// Create an IR BasicBlock to hold the output instructions generated by this /// VPBasicBlock, and return it. Update the CFGState accordingly. BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); }; +/// A special type of VPBasicBlock that wraps an existing IR basic block. +/// Recipes of the block get added before the first non-phi instruction in the +/// wrapped block. +/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's +/// preheader block. +class VPIRBasicBlock : public VPBasicBlock { + BasicBlock *IRBB; + +public: + VPIRBasicBlock(BasicBlock *IRBB) + : VPBasicBlock(VPIRBasicBlockSC, "ph"), IRBB(IRBB) {} + + ~VPIRBasicBlock() override {} + + static inline bool classof(const VPBlockBase *V) { + return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC; + } + + /// The method which generates the output IR instructions that correspond to + /// this VPBasicBlock, thereby "executing" the VPlan. + void execute(VPTransformState *State) override; + + VPIRBasicBlock *clone() override { + auto *NewBlock = new VPIRBasicBlock(IRBB); + for (VPRecipeBase &R : Recipes) + NewBlock->appendRecipe(R.clone()); + return NewBlock; + } + + BasicBlock *getIRBasicBlock() const { return IRBB; } +}; + /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks /// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. /// A VPRegionBlock may indicate that its contents are to be replicated several @@ -3142,12 +3182,12 @@ class VPlan { ~VPlan(); /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping - /// original scalar pre-header) which contains SCEV expansions that need to - /// happen before the CFG is modified; a VPBasicBlock for the vector + /// original scalar pre-header \p PH) which contains SCEV expansions that need + /// to happen before the CFG is modified; a VPBasicBlock for the vector /// pre-header, followed by a region for the vector loop, followed by the /// middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, - ScalarEvolution &PSE); + ScalarEvolution &PSE, BasicBlock *PH); /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 2fe487f972bb9d6..56c82b13ccdef3b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -17,6 +17,7 @@ #include "VPlanCFG.h" #include "VPlanDominatorTree.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" #define DEBUG_TYPE "loop-vectorize" @@ -27,6 +28,8 @@ namespace { class VPlanVerifier { const VPDominatorTree &VPDT; + SmallPtrSet WrappedIRBBs; + // Verify that phi-like recipes are at the beginning of \p VPBB, with no // other recipes in between. Also check that only header blocks contain // VPHeaderPHIRecipes. @@ -148,6 +151,19 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { } } } + + auto *IRBB = dyn_cast(VPBB); + if (!IRBB) + return true; + + if (!WrappedIRBBs.insert(IRBB->getIRBasicBlock()).second) { + errs() << "Same IR basic block used by multiple wrapper blocks!\n"; + return false; + } + if (IRBB != IRBB->getPlan()->getPreheader()) { + errs() << "VPIRBasicBlock can only be used as pre-header at the moment!\n"; + return false; + } return true; } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 6cd43f6803130aa..c658724278fe0ce 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -67,9 +67,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, + L->getLoopPreheader()); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); return Plan; } @@ -80,9 +81,10 @@ class VPlanTestBase : public testing::Test { assert(!verifyFunction(F) && "input function must be valid"); doAnalysis(F); - auto Plan = VPlan::createInitialVPlan( - SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); - VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); + Loop *L = LI->getLoopFor(LoopHeader); + auto Plan = VPlan::createInitialVPlan(SE->getBackedgeTakenCount(L), *SE, + L->getLoopPreheader()); + VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildPlainCFG(); return Plan; }