diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 75ff0fe259a3b08..b33fb6cce701a3f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2466,6 +2466,25 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { return VectorTripCount; } +/// Introduces a new VPIRBasicBlock for \p CheckIRBB to \p Plan between the +/// vector preheader and its predecessor, also connecting the new block to the +/// scalar preheader. +static void introduceCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) { + VPBlockBase *ScalarPH = Plan.getScalarPreheader(); + VPBlockBase *VectorPH = Plan.getVectorPreheader(); + VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor(); + if (PreVectorPH->getNumSuccessors() != 1) { + assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); + assert(PreVectorPH->getSuccessors()[0] == ScalarPH && + "Unexpected successor"); + VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB); + VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckVPIRBB); + PreVectorPH = CheckVPIRBB; + } + VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH); + PreVectorPH->swapSuccessors(); +} + void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. @@ -2540,14 +2559,15 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { DT->getNode(Bypass)->getIDom()) && "TC check is expected to dominate Bypass"); - // Update dominator for Bypass & LoopExit (if needed). - DT->changeImmediateDominator(Bypass, TCCheckBlock); BranchInst &BI = *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); LoopBypassBlocks.push_back(TCCheckBlock); + + // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here. + introduceCheckBlockInVPlan(Plan, TCCheckBlock); } BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { @@ -2564,6 +2584,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { "Should already be a bypass block due to iteration count check"); LoopBypassBlocks.push_back(SCEVCheckBlock); AddedSafetyChecks = true; + + introduceCheckBlockInVPlan(Plan, SCEVCheckBlock); return SCEVCheckBlock; } @@ -2600,6 +2622,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { AddedSafetyChecks = true; + introduceCheckBlockInVPlan(Plan, MemCheckBlock); return MemCheckBlock; } @@ -7980,8 +8003,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, DT->getNode(Bypass)->getIDom()) && "TC check is expected to dominate Bypass"); - // Update dominator for Bypass. - DT->changeImmediateDominator(Bypass, TCCheckBlock); LoopBypassBlocks.push_back(TCCheckBlock); // Save the trip count so we don't have to regenerate it in the @@ -7996,6 +8017,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); + introduceCheckBlockInVPlan(Plan, TCCheckBlock); return TCCheckBlock; } @@ -8027,9 +8049,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith( VecEpilogueIterationCountCheck, LoopVectorPreHeader); - DT->changeImmediateDominator(LoopVectorPreHeader, - EPI.MainLoopIterationCountCheck); - EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith( VecEpilogueIterationCountCheck, LoopScalarPreHeader); @@ -8040,19 +8059,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith( VecEpilogueIterationCountCheck, LoopScalarPreHeader); - DT->changeImmediateDominator( - VecEpilogueIterationCountCheck, - VecEpilogueIterationCountCheck->getSinglePredecessor()); - DT->changeImmediateDominator(LoopScalarPreHeader, EPI.EpilogueIterationCountCheck); - if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector())) - // If there is an epilogue which must run, there's no edge from the - // middle block to exit blocks and thus no need to update the immediate - // dominator of the exit blocks. - DT->changeImmediateDominator(OrigLoop->getUniqueLatchExitBlock(), - EPI.EpilogueIterationCountCheck); - // Keep track of bypass blocks, as they feed start values to the induction and // reduction phis in the scalar loop preheader. if (EPI.SCEVSafetyCheck) @@ -8143,6 +8151,16 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( } ReplaceInstWithInst(Insert->getTerminator(), &BI); LoopBypassBlocks.push_back(Insert); + + // A new entry block has been created for the epilogue VPlan. Hook it in, as + // otherwise we would try to modify the entry to the main vector loop. + VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert); + VPBasicBlock *OldEntry = Plan.getEntry(); + VPBlockUtils::reassociateBlocks(OldEntry, NewEntry); + Plan.setEntry(NewEntry); + delete OldEntry; + + introduceCheckBlockInVPlan(Plan, Insert); return Insert; } @@ -10495,8 +10513,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogILV.setTripCount(MainILV.getTripCount()); preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI); - assert(DT->verify(DominatorTree::VerificationLevel::Fast) && - "DT not preserved correctly"); LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, true, &ExpandedSCEVs); ++LoopsEpilogueVectorized; @@ -10524,6 +10540,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { checkMixedPrecision(L, ORE); } + assert(DT->verify(DominatorTree::VerificationLevel::Fast) && + "DT not preserved correctly"); + std::optional RemainderLoopID = makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupEpilogue}); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1d58c3551a4e1c5..0ede3012e165931 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -170,9 +170,7 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() { } void VPBlockBase::setPlan(VPlan *ParentPlan) { - assert( - (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && - "Can only set plan on its entry or preheader block."); + assert(ParentPlan->getEntry() == this && "Can only set plan on its entry."); Plan = ParentPlan; } @@ -823,6 +821,18 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, } #endif +VPlan::VPlan(VPBasicBlock *OriginalPreheader, VPValue *TC, + VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader) + : VPlan(OriginalPreheader, TC, ScalarHeader) { + VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader); +} + +VPlan::VPlan(VPBasicBlock *OriginalPreheader, + VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader) + : VPlan(OriginalPreheader, ScalarHeader) { + VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader); +} + VPlan::~VPlan() { if (Entry) { VPValue DummyValue; @@ -830,9 +840,6 @@ VPlan::~VPlan() { Block->dropAllReferences(&DummyValue); VPBlockBase::deleteCFG(Entry); - - Preheader->dropAllReferences(&DummyValue); - delete Preheader; } for (VPValue *VPV : VPLiveInsToFree) delete VPV; @@ -855,9 +862,16 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, VPIRBasicBlock *Entry = VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + // Connect entry only to vector preheader initially. Entry will also be + // connected to the scalar preheader later, during skeleton creation when + // runtime guards are added as needed. Note that when executing the VPlan for + // an epilogue vector loop, the original entry block here will be replaced by + // a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after + // generating code for the main vector loop. + VPBlockUtils::connectBlocks(Entry, VecPreheader); VPIRBasicBlock *ScalarHeader = VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); - auto Plan = std::make_unique(Entry, VecPreheader, ScalarHeader); + auto Plan = std::make_unique(Entry, ScalarHeader); // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when @@ -981,15 +995,21 @@ void VPlan::execute(VPTransformState *State) { State->CFG.DTU.applyUpdates( {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); - // Replace regular VPBB's for the middle and scalar preheader blocks with - // VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during - // skeleton creation, so we can only create the VPIRBasicBlocks now during - // VPlan execution rather than earlier during VPlan construction. + // Replace regular VPBB's for the vector preheader, middle and scalar + // preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR + // blocks are created during skeleton creation, so we can only create the + // VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan + // construction. BasicBlock *MiddleBB = State->CFG.ExitBB; - VPBasicBlock *MiddleVPBB = getMiddleBlock(); BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); + replaceVPBBWithIRVPBB(getVectorPreheader(), VectorPreHeader); + replaceVPBBWithIRVPBB(getMiddleBlock(), MiddleBB); replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); - replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); + + LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF + << ", UF=" << getUF() << '\n'); + setName("Final VPlan"); + LLVM_DEBUG(dump()); LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF << ", UF=" << getUF() << '\n'); @@ -1062,9 +1082,6 @@ void VPlan::execute(VPTransformState *State) { } State->CFG.DTU.flush(); - assert(State->CFG.DTU.getDomTree().verify( - DominatorTree::VerificationLevel::Fast) && - "DT not preserved correctly"); } InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { @@ -1117,11 +1134,6 @@ void VPlan::print(raw_ostream &O) const { printLiveIns(O); - if (!getPreheader()->empty()) { - O << "\n"; - getPreheader()->print(O, "", SlotTracker); - } - ReversePostOrderTraversal> RPOT(getEntry()); for (const VPBlockBase *Block : RPOT) { @@ -1155,6 +1167,21 @@ std::string VPlan::getName() const { return Out; } +VPRegionBlock *VPlan::getVectorLoopRegion() { + // TODO: Cache if possible. + for (VPBlockBase *B : vp_depth_first_shallow(getEntry())) + if (auto *R = dyn_cast(B)) + return R; + return nullptr; +} + +const VPRegionBlock *VPlan::getVectorLoopRegion() const { + for (const VPBlockBase *B : vp_depth_first_shallow(getEntry())) + if (auto *R = dyn_cast(B)) + return R; + return nullptr; +} + LLVM_DUMP_METHOD void VPlan::printDOT(raw_ostream &O) const { VPlanPrinter Printer(O, *this); @@ -1205,7 +1232,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, VPlan *VPlan::duplicate() { // Clone blocks. - VPBasicBlock *NewPreheader = Preheader->clone(); const auto &[NewEntry, __] = cloneFrom(Entry); BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); @@ -1215,8 +1241,7 @@ VPlan *VPlan::duplicate() { return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; })); // Create VPlan, clone live-ins and remap operands in the cloned blocks. - auto *NewPlan = - new VPlan(NewPreheader, cast(NewEntry), NewScalarHeader); + auto *NewPlan = new VPlan(cast(NewEntry), NewScalarHeader); DenseMap Old2NewVPValues; for (VPValue *OldLiveIn : VPLiveInsToFree) { Old2NewVPValues[OldLiveIn] = @@ -1236,7 +1261,6 @@ VPlan *VPlan::duplicate() { // else NewTripCount will be created and inserted into Old2NewVPValues when // TripCount is cloned. In any case NewPlan->TripCount is updated below. - remapOperands(Preheader, NewPreheader, Old2NewVPValues); remapOperands(Entry, NewEntry, Old2NewVPValues); // Initialize remaining fields of cloned VPlan. @@ -1288,8 +1312,6 @@ void VPlanPrinter::dump() { OS << "edge [fontname=Courier, fontsize=30]\n"; OS << "compound=true\n"; - dumpBlock(Plan.getPreheader()); - for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) dumpBlock(Block); @@ -1550,7 +1572,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { assignName(Plan.BackedgeTakenCount); for (VPValue *LI : Plan.VPLiveInsToFree) assignName(LI); - assignNames(Plan.getPreheader()); ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 7440a3a386fd2db..ae68e1fc63a1391 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3768,14 +3768,12 @@ class VPlan { friend class VPlanPrinter; friend class VPSlotTracker; - /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the - /// preheader of the vector loop. - VPBasicBlock *Entry; - /// VPBasicBlock corresponding to the original preheader. Used to place /// VPExpandSCEV recipes for expressions used during skeleton creation and the /// rest of VPlan execution. - VPBasicBlock *Preheader; + /// When this VPlan is used for the epilogue vector loop, the entry will be + /// replaced by a new entry block created during skeleton creation. + VPBasicBlock *Entry; /// VPIRBasicBlock wrapping the header of the original scalar loop. VPIRBasicBlock *ScalarHeader; @@ -3821,45 +3819,47 @@ class VPlan { DenseMap SCEVToExpansion; public: - /// Construct a VPlan with original preheader \p Preheader, trip count \p TC, - /// \p Entry to the plan and with \p ScalarHeader wrapping the original header - /// of the scalar loop. At the moment, \p Preheader and \p Entry need to be - /// disconnected, as the bypass blocks between them are not yet modeled in - /// VPlan. - VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry, - VPIRBasicBlock *ScalarHeader) - : VPlan(Preheader, Entry, ScalarHeader) { + /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and + /// with \p ScalarHeader wrapping the original header of the scalar loop. + VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader) + : VPlan(Entry, ScalarHeader) { TripCount = TC; } - /// Construct a VPlan with original preheader \p Preheader, \p Entry to - /// the plan and with \p ScalarHeader wrapping the original header of the - /// scalar loop. At the moment, \p Preheader and \p Entry need to be - /// disconnected, as the bypass blocks between them are not yet modeled in - /// VPlan. - VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry, - VPIRBasicBlock *ScalarHeader) - : Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) { + /// Constructor variants that take disconnected preheader and entry blocks, + /// connecting them as part of construction. + /// FIXME: Only used to reduce the need of code changes during transition. + VPlan(VPBasicBlock *OriginalPreheader, VPValue *TC, + VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader); + VPlan(VPBasicBlock *OriginalPreheader, VPBasicBlock *EntryVectorPreHeader, + VPIRBasicBlock *ScalarHeader); + + /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader + /// wrapping the original header of the scalar loop. + VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader) + : Entry(Entry), ScalarHeader(ScalarHeader) { Entry->setPlan(this); - Preheader->setPlan(this); - assert(Preheader->getNumSuccessors() == 0 && - Preheader->getNumPredecessors() == 0 && - "preheader must be disconnected"); assert(ScalarHeader->getNumSuccessors() == 0 && "scalar header must be a leaf node"); } ~VPlan(); + void setEntry(VPBasicBlock *VPBB) { + Entry = VPBB; + VPBB->setPlan(this); + } + /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping - /// original scalar pre-header ) which contains SCEV expansions that need - /// to happen before the CFG is modified; a VPBasicBlock for the vector - /// pre-header, followed by a region for the vector loop, followed by the - /// middle VPBasicBlock. If a check is needed to guard executing the scalar - /// epilogue loop, it will be added to the middle block, together with - /// VPBasicBlocks for the scalar preheader and exit blocks. - /// \p InductionTy is the type of the canonical induction and used for related - /// values, like the trip count expression. + /// original scalar pre-header) which contains SCEV expansions that need + /// to happen before the CFG is modified (when executing a VPlan for the + /// epilogue vector loop, the original entry needs to be replaced by a new + /// one); a VPBasicBlock for the vector pre-header, followed by a region for + /// the vector loop, followed by the middle VPBasicBlock. If a check is needed + /// to guard executing the scalar epilogue loop, it will be added to the + /// middle block, together with VPBasicBlocks for the scalar preheader and + /// exit blocks. \p InductionTy is the type of the canonical induction and + /// used for related values, like the trip count expression. static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, @@ -3884,26 +3884,22 @@ class VPlan { } /// Returns the VPRegionBlock of the vector loop. - VPRegionBlock *getVectorLoopRegion() { - return cast(getEntry()->getSingleSuccessor()); - } - const VPRegionBlock *getVectorLoopRegion() const { - return cast(getEntry()->getSingleSuccessor()); - } + VPRegionBlock *getVectorLoopRegion(); + const VPRegionBlock *getVectorLoopRegion() const; /// Returns the 'middle' block of the plan, that is the block that selects /// whether to execute the scalar tail loop or the exit block from the loop /// latch. const VPBasicBlock *getMiddleBlock() const { - return cast(getScalarPreheader()->getSinglePredecessor()); + return cast(getScalarPreheader()->getPredecessors().front()); } VPBasicBlock *getMiddleBlock() { - return cast(getScalarPreheader()->getSinglePredecessor()); + return cast(getScalarPreheader()->getPredecessors().front()); } /// Return the VPBasicBlock for the preheader of the scalar loop. VPBasicBlock *getScalarPreheader() const { - return cast(ScalarHeader->getSinglePredecessor()); + return cast(getScalarHeader()->getSinglePredecessor()); } /// Return the VPIRBasicBlock wrapping the header of the scalar loop. @@ -4039,8 +4035,10 @@ class VPlan { } /// \return The block corresponding to the original preheader. - VPBasicBlock *getPreheader() { return Preheader; } - const VPBasicBlock *getPreheader() const { return Preheader; } + /// FIXME: There's no separate preheader any longer and Entry now serves the + /// same purpose as the original preheader. Remove after transition. + VPBasicBlock *getPreheader() { return Entry; } + const VPBasicBlock *getPreheader() const { return Entry; } /// Clone the current VPlan, update all VPValues of the new VPlan and cloned /// recipes to refer to the clones, and return it. @@ -4190,8 +4188,6 @@ class VPBlockUtils { "Can't connect two block with different parents"); assert((SuccIdx != -1u || From->getNumSuccessors() < 2) && "Blocks can't have more than two successors."); - assert((PredIdx != -1u || To->getNumPredecessors() < 2) && - "Blocks can't have more than two predecessors."); if (SuccIdx == -1u) From->appendSuccessor(To); else diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 09e7851f41b1e29..02774d8e5c5fefb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -633,14 +633,14 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name); BasicBlock *VPlanPred = State.CFG - .VPBB2IRBB[cast(getParent()->getSinglePredecessor())]; + .VPBB2IRBB[cast(getParent()->getPredecessors()[0])]; NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); // TODO: Predecessors are temporarily reversed to reduce test changes. // Remove it and update remaining tests after functional change landed. auto Predecessors = to_vector(predecessors(Builder.GetInsertBlock())); for (auto *OtherPred : reverse(Predecessors)) { - assert(OtherPred != VPlanPred && - "VPlan predecessors should not be connected yet"); + if (OtherPred == VPlanPred) + continue; NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); } return NewPhi; @@ -3257,13 +3257,22 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, void VPExpandSCEVRecipe::execute(VPTransformState &State) { assert(!State.Lane && "cannot be used in per-lane"); + if (State.ExpandedSCEVs.contains(Expr)) { + // SCEV Expr has already been expanded, result must already be set. At the + // moment we have to execute the entry block twice (once before skeleton + // creation to get expanded SCEVs used by the skeleton and once during + // regular VPlan execution). + State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]); + assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] && + "Results must match"); + return; + } + const DataLayout &DL = State.CFG.PrevBB->getDataLayout(); SCEVExpander Exp(SE, DL, "induction"); Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), &*State.Builder.GetInsertPoint()); - assert(!State.ExpandedSCEVs.contains(Expr) && - "Same SCEV expanded multiple times"); State.ExpandedSCEVs[Expr] = Res; State.set(this, Res, VPLane(0)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cdc541562525dbd..f7b9a676ae80890 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -377,7 +377,8 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) { continue; auto *PredVPBB = dyn_cast_or_null(VPBB->getSinglePredecessor()); - if (!PredVPBB || PredVPBB->getNumSuccessors() != 1) + if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 || + isa(PredVPBB)) continue; WorkList.push_back(VPBB); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index ff6c9295ee2057a..89e372d6b46cfd2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -412,8 +412,6 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { UnrollState Unroller(Plan, UF, Ctx); - Unroller.unrollBlock(Plan.getPreheader()); - // Iterate over all blocks in the plan starting from Entry, and unroll // recipes inside them. This includes the vector preheader and middle blocks, // which may set up or post-process per-part values. diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 4621c28b051298e..e40af3e2e3d30a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -34,7 +34,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, Expanded = Plan.getOrAddLiveIn(E->getValue()); else { Expanded = new VPExpandSCEVRecipe(Expr, SE); - Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe()); + Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe()); } Plan.addSCEVExpansion(Expr, Expanded); return Expanded; diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 71c7d547ac7d917..be420a873bef526 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -185,7 +185,7 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { RecipeNumbering[&R] = Cnt++; for (const VPRecipeBase &R : *VPBB) { - if (isa(&R) ^ isa(VPBB)) { + if (isa(&R) && !isa(VPBB)) { errs() << "VPIRInstructions "; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) R.dump(); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 0a7948d89a6b469..d81cfbf08ec9393 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -15,7 +15,7 @@ target triple = "aarch64-unknown-linux-gnu" ; VPLANS-EMPTY: ; VPLANS-NEXT: ir-bb: ; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) -; VPLANS-NEXT: No successors +; VPLANS-NEXT: Successor(s): vector.ph ; VPLANS-EMPTY: ; VPLANS-NEXT: vector.ph: ; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index c3f753af6fb53c7..1ee6083eb59a5a5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -13,6 +13,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%N> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll index 9739611a8b6e485..3ef99ff496a6870 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll @@ -16,6 +16,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count ; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -59,6 +62,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count ; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index 050bc7c346bf263..8ac46fe7687d243 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -14,6 +14,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -56,6 +59,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -103,6 +109,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -145,6 +154,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -191,6 +203,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -233,6 +248,9 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index 7a67f37c726622a..648f6e874abbe06 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -12,6 +12,9 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -54,6 +57,9 @@ target triple = "arm64-apple-ios" ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll index a0696b3204dbd4b..cd1d18aad8361d4 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -14,6 +14,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<%N> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -77,6 +80,9 @@ define void @safe_dep(ptr %p) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<512> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index fcc01e0ba2e2d1c..eb60c24393df993 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -61,7 +61,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -152,9 +152,40 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %3 = add nsw i64 %0, -1 +; CHECK-NEXT: IR %4 = add i32 %n, -1 +; CHECK-NEXT: IR %5 = trunc i64 %3 to i32 +; CHECK-NEXT: IR %mul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 %5) +; CHECK-NEXT: IR %mul.result = extractvalue { i32, i1 } %mul, 0 +; CHECK-NEXT: IR %mul.overflow = extractvalue { i32, i1 } %mul, 1 +; CHECK-NEXT: IR %6 = sub i32 %4, %mul.result +; CHECK-NEXT: IR %7 = icmp ugt i32 %6, %4 +; CHECK-NEXT: IR %8 = or i1 %7, %mul.overflow +; CHECK-NEXT: IR %9 = icmp ugt i64 %3, 4294967295 +; CHECK-NEXT: IR %10 = or i1 %8, %9 +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %11 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %12 = mul i64 %11, 4 +; CHECK-NEXT: IR %13 = mul i64 %12, 4 +; CHECK-NEXT: IR %14 = sub i64 %B1, %A2 +; CHECK-NEXT: IR %diff.check = icmp ult i64 %14, %13 +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %15 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %16 = mul i64 %15, 4 +; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, %16 +; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf +; CHECK-NEXT: IR %ind.end = sub i64 %0, %n.vec +; CHECK-NEXT: IR %.cast = trunc i64 %n.vec to i32 +; CHECK-NEXT: IR %ind.end3 = sub i32 %n, %.cast +; CHECK-NEXT: IR %17 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %18 = mul i64 %17, 4 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -182,19 +213,19 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi ir<%ind.end>, ir<%0> -; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi ir<%ind.end3>, ir<%n> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = resume-phi ir<%ind.end>, ir<%0> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = resume-phi ir<%ind.end3>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %scalar.ph ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME1]]> from ir-bb -; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %scalar.ph ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME2]]>.1 from ir-bb +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %scalar.ph ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME_1]]> from ir-bb) +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %scalar.ph ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME_2]]>.1 from ir-bb) ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK: LV: Loop does not require scalar epilogue ; @@ -277,7 +308,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -368,9 +399,40 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %0 = zext i32 %n to i64 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %3 = add nsw i64 %0, -1 +; CHECK-NEXT: IR %4 = add i32 %n, -1 +; CHECK-NEXT: IR %5 = trunc i64 %3 to i32 +; CHECK-NEXT: IR %mul = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 %5) +; CHECK-NEXT: IR %mul.result = extractvalue { i32, i1 } %mul, 0 +; CHECK-NEXT: IR %mul.overflow = extractvalue { i32, i1 } %mul, 1 +; CHECK-NEXT: IR %6 = sub i32 %4, %mul.result +; CHECK-NEXT: IR %7 = icmp ugt i32 %6, %4 +; CHECK-NEXT: IR %8 = or i1 %7, %mul.overflow +; CHECK-NEXT: IR %9 = icmp ugt i64 %3, 4294967295 +; CHECK-NEXT: IR %10 = or i1 %8, %9 +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %11 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %12 = mul i64 %11, 4 +; CHECK-NEXT: IR %13 = mul i64 %12, 4 +; CHECK-NEXT: IR %14 = sub i64 %B1, %A2 +; CHECK-NEXT: IR %diff.check = icmp ult i64 %14, %13 +; CHECK-NEXT: Successor(s): ir-bb, ir-bb +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %15 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %16 = mul i64 %15, 4 +; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, %16 +; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf +; CHECK-NEXT: IR %ind.end = sub i64 %0, %n.vec +; CHECK-NEXT: IR %.cast = trunc i64 %n.vec to i32 +; CHECK-NEXT: IR %ind.end3 = sub i32 %n, %.cast +; CHECK-NEXT: IR %17 = call i64 @llvm.vscale.i64() +; CHECK-NEXT: IR %18 = mul i64 %17, 4 ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -398,19 +460,19 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi ir<%ind.end>, ir<%0> ; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi ir<%ind.end3>, ir<%n> ; CHECK-NEXT: Successor(s): ir-bb ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %scalar.ph ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME1]]> from ir-bb -; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %scalar.ph ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME2]]>.1 from ir-bb +; CHECK-NEXT: IR %indvars.iv = phi i64 [ %0, %scalar.ph ], [ %indvars.iv.next, %for.body ] (extra operand: vp<[[RESUME1]]> from ir-bb) +; CHECK-NEXT: IR %i.0.in8 = phi i32 [ %n, %scalar.ph ], [ %i.0, %for.body ] (extra operand: vp<[[RESUME2]]>.1 from ir-bb) ; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK: LV: Loop does not require scalar epilogue ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index a474d926a6303f8..e7eb5778ffb930a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -29,7 +29,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count ; IF-EVL-OUTLOOP-NEXT: Live-in ir<%n> = original trip-count ; IF-EVL-OUTLOOP-EMPTY: -; IF-EVL-OUTLOOP: vector.ph: +; IF-EVL-OUTLOOP-NEXT: ir-bb: +; IF-EVL-OUTLOOP-NEXT: Successor(s): vector.ph +; IF-EVL-OUTLOOP-EMPTY: +; IF-EVL-OUTLOOP-NEXT: vector.ph: ; IF-EVL-OUTLOOP-NEXT: Successor(s): vector loop ; IF-EVL-OUTLOOP-EMPTY: ; IF-EVL-OUTLOOP-NEXT: vector loop: { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 4983ea0f7a7386a..53c9fb0c604daaf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -11,7 +11,18 @@ ; IF-EVL-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count - ; IF-EVL: vector.ph: + ; IF-EVL: ir-bb: + ; IF-EVL-NEXT: Successor(s): ir-bb, ir-bb + + ; IF-EVL: ir-bb: + ; IF-EVL-NEXT: IR %4 = call i64 @llvm.vscale.i64() + ; IF-EVL-NEXT: IR %5 = mul i64 %4, 4 + ; IF-EVL-NEXT: IR %6 = sub i64 %5, 1 + ; IF-EVL-NEXT: IR %n.rnd.up = add i64 %N, %6 + ; IF-EVL-NEXT: IR %n.mod.vf = urem i64 %n.rnd.up, %5 + ; IF-EVL-NEXT: IR %n.vec = sub i64 %n.rnd.up, %n.mod.vf + ; IF-EVL-NEXT: IR %7 = call i64 @llvm.vscale.i64() + ; IF-EVL-NEXT: IR %8 = mul i64 %7, 4 ; IF-EVL-NEXT: Successor(s): vector loop ; IF-EVL: vector loop: { diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index d2d2063fd90583d..0eab97b0cc7351d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -9,6 +9,9 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -81,6 +84,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -165,6 +171,9 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<4098> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%x>, ir<2> ; CHECK-NEXT: Successor(s): vector loop @@ -237,6 +246,9 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<4098> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 3d8403b1eb9c015..d59573e7f4678e4 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -15,6 +15,9 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -121,6 +124,9 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop @@ -205,6 +211,9 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 ; CHECK-NEXT: Successor(s): vector loop @@ -282,6 +291,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -397,7 +409,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32 @@ -487,6 +499,9 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<3> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index 717014743832746..ee87636eb031650 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -43,6 +43,9 @@ for.end: ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<14> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 53a0a308c79a9c1..bb17580ac4d119f 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -11,7 +11,7 @@ ; DBG-EMPTY: ; DBG-NEXT: ir-bb: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start)) -; DBG-NEXT: No successors +; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop @@ -72,6 +72,9 @@ declare i32 @llvm.smin.i32(i32, i32) ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; DBG-NEXT: Live-in ir<1000> = original trip-count ; DBG-EMPTY: +; DBG-NEXT: ir-bb: +; DBG-NEXT: Successor(s): vector.ph +; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: @@ -192,7 +195,7 @@ exit: ; DBG-EMPTY: ; DBG-NEXT: ir-bb: ; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64) -; DBG-NEXT: No successors +; DBG-NEXT: Successor(s): vector.ph ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32 diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll index 368c766e6b3c703..730dbfe84070abe 100644 --- a/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll @@ -15,7 +15,7 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 ; CHECK-NEXT: IR call void @init(ptr %src) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -99,7 +99,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values() { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 ; CHECK-NEXT: IR call void @init(ptr %src) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -176,7 +176,7 @@ define i64 @multi_exiting_to_same_exit_live_in_exit_values_2() { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %src = alloca [128 x i32], align 4 ; CHECK-NEXT: IR call void @init(ptr %src) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll index 68dd47537fdfdfa..50755f8f05e35a8 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -14,8 +14,9 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: compound=true ; CHECK-NEXT: N0 [label = ; CHECK-NEXT: "ir-bb\:\l" + -; CHECK-NEXT: "No successors\l" +; CHECK-NEXT: "Successor(s): vector.ph\l" ; CHECK-NEXT: ] +; CHECK-NEXT: N0 -> N1 [ label=""] ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "vector.ph:\l" + ; CHECK-NEXT: "Successor(s): vector loop\l" diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 323a638fd635495..c342d2f81e979bf 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -9,6 +9,9 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll index f0ac88c75e9ecf1..484e1ea8de0d224 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll @@ -9,9 +9,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %n.mod.vf = urem i64 %0, 2 +; CHECK-NEXT: IR %n.vec = sub i64 %0, %n.mod.vf +; CHECK-NEXT: IR %ind.end = getelementptr i8, ptr %start, i64 %n.vec ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -87,6 +90,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi ir<%ind.end>, ir<%start> ; CHECK-NEXT: Successor(s): ir-bb @@ -95,9 +101,6 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %scalar.ph ], [ %ptr.iv.next, %loop.latch ] (extra operand: vp<[[RESUME]]> from ir-bb) ; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv, align 1 ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 6bd31565e9fb9e7..f07d1af47af02e7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -16,7 +16,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %and = and i64 %N, 15 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -64,9 +64,13 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: IR %and = and i64 %N, 15 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: IR %n.mod.vf = urem i64 %and, 16 +; CHECK-NEXT: IR %n.vec = sub i64 %and, %n.mod.vf +; CHECK-NEXT: IR %ind.end = sub i64 %and, %n.vec +; CHECK-NEXT: IR %ind.end1 = getelementptr i8, ptr %A, i64 %n.vec ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -95,6 +99,9 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> ; CHECK-NEXT: Successor(s): ir-bb, ir-bb ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi ir<%ind.end>, ir<%and> ; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi ir<%ind.end1>, ir<%A> @@ -105,9 +112,6 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: IR %p.src = phi ptr [ %A, %scalar.ph ], [ %p.src.next, %loop ] (extra operand: vp<[[RESUME2]]>.1 from ir-bb) ; CHECK: IR %cmp = icmp eq i64 %iv.next, 0 ; CHECK-NEXT: No successors -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: No successors ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 5e3116eae854830..2adeb5920cb5b2b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,6 +11,9 @@ define void @foo(i64 %n) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<8> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 32e55bff94b3e50..c526c53dbea067d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -13,6 +13,9 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -77,6 +80,9 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x, ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -145,6 +151,9 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -208,6 +217,9 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -275,7 +287,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax %n) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -365,6 +377,9 @@ define void @print_interleave_groups(i32 %C, i32 %D) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<256> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -446,6 +461,9 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -516,6 +534,9 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<128> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -624,7 +645,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) { ; CHECK-NEXT: IR %inc = add i64 %div, 1 ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060)) /u (1 + (%y /u 492802768830814060)))) ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -691,6 +712,9 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -752,6 +776,9 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr % ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -818,6 +845,9 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -882,6 +912,9 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -977,6 +1010,9 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1041,6 +1077,9 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1082,6 +1121,9 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll index 823b1cef93ce742..a939b1e923a914f 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -12,6 +12,9 @@ define void @sink_with_sideeffects(i1 %c, ptr %ptr) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: ir<0> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 330a6b1715c78e5..94aefdee82b148b 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -19,7 +19,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -88,7 +88,7 @@ exit: ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -173,7 +173,7 @@ exit: ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -258,6 +258,9 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<11> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr inbounds ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): vector loop @@ -332,7 +335,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -430,7 +433,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -537,7 +540,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -644,7 +647,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -750,7 +753,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -816,7 +819,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -898,6 +901,9 @@ define void @update_multiple_users(ptr noalias %src, ptr noalias %dst, i1 %c) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<999> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -962,6 +968,9 @@ define void @sinking_requires_duplication(ptr %addr) { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in ir<201> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1033,6 +1042,9 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -1119,7 +1131,7 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: ; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64)) -; CHECK-NEXT: No successors +; CHECK-NEXT: Successor(s): vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll index 27d81de260d3b94..9778ef6853a70f6 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll @@ -13,6 +13,9 @@ define void @test_unused_interleave(ptr %src, i32 %length) { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count ; CHECK-NEXT: Live-in ir<%length> = original trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: Successor(s): vector.ph +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll index 4423b89d815656d..7097171ab78c5f0 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll @@ -18,12 +18,8 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK: for.body.preheader10: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER13:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: for.body.preheader13: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC:%.*]], [[MIDDLE_BLOCK:%.*]] ] -; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7:%.*]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC]] = and i64 [[TMP0]], 4294967288 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967288 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -37,12 +33,16 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[WIDE_LOAD12]], [[VEC_PHI11]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP5]], [[TMP4]] -; CHECK-NEXT: [[TMP7]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]] +; CHECK: for.body.preheader13: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] @@ -133,12 +133,8 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1 ; CHECK: for.body.preheader8: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER11:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: for.body.preheader11: -; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[N_VEC:%.*]], [[SPAN_CHECKED_ACCESS_EXIT:%.*]] ] -; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[ADD:%.*]], [[SPAN_CHECKED_ACCESS_EXIT]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC]] = and i64 [[N]], -8 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ] @@ -152,12 +148,16 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1 ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[WIDE_LOAD10]], [[VEC_PHI9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[SPAN_CHECKED_ACCESS_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: [[ADD]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]] +; CHECK: for.body.preheader11: +; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ] +; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ] +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[RET_0_LCSSA1:%.*]] = phi i32 [ 0, [[ENTRY1:%.*]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ], [ [[ADD1:%.*]], [[FOR_BODY1]] ] ; CHECK-NEXT: ret i32 [[RET_0_LCSSA1]] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index 63e85731e8c7007..8bc40cfc5cc8b85 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -53,9 +53,6 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: br i1 [[CMP24_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] ; O2: for.body4.preheader: ; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER9:%.*]], label [[VECTOR_BODY:%.*]] -; O2: for.body4.preheader9: -; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK:%.*]] ] -; O2-NEXT: br label [[FOR_BODY4:%.*]] ; O2: vector.body: ; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ] ; O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]] @@ -68,9 +65,12 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]] ; O2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; O2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; O2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; O2: middle.block: ; O2-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER9]] +; O2: for.body4.preheader9: +; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, [[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; O2-NEXT: br label [[FOR_BODY4:%.*]] ; O2: for.cond.cleanup: ; O2-NEXT: ret void ; O2: for.cond.cleanup3: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll index d79ffb0149ff87c..1e062041b128693 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll @@ -17,11 +17,8 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f ; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4 ; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[FOR_BODY_PREHEADER14]]: -; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER:%.*]], %[[MIDDLE_BLOCK:.*]] ] -; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[UNROLL_ITER]] = and i64 [[TMP0]], 2147483644 +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -40,10 +37,13 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f ; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]] +; CHECK: [[FOR_BODY_PREHEADER14]]: +; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; CHECK: [[FOR_BODY]]: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index 93277eed8be129c..1b362d1d26bdd34 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -107,8 +107,9 @@ edge [fontname=Courier, fontsize=30] compound=true N0 [label = "ir-bb\:\l" + - "No successors\l" + "Successor(s): vector.ph\l" ] + N0 -> N1 [ label=""] N1 [label = "vector.ph:\l" + "Successor(s): vector loop\l" diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 3179cfc676ab67b..bc8bcc3447ea0ab 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -747,8 +747,9 @@ compound=true N0 [label = "preheader:\l" + " EMIT vp\<%1\> = add\l" + - "No successors\l" + "Successor(s): bb1\l" ] + N0 -> N1 [ label=""] N1 [label = "bb1:\l" + " EMIT vp\<%2\> = add\l" + @@ -840,7 +841,7 @@ vp<%1> = original trip-count preheader: EMIT vp<%1> = sub -No successors +Successor(s): bb1 bb1: EMIT vp<%2> = add @@ -864,7 +865,7 @@ vp<%1> = original trip-count preheader: EMIT vp<%1> = sub -No successors +Successor(s): bb1 bb1: EMIT vp<%2> = add @@ -888,7 +889,7 @@ vp<%1> = original trip-count preheader: EMIT vp<%1> = sub -No successors +Successor(s): bb1 bb1: EMIT vp<%2> = add diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index edb3f8a2952942d..bc4f3943447fde5 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -227,7 +227,6 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - VPBB1->setParent(R1); auto TC = std::make_unique(); LLVMContext C; @@ -235,6 +234,7 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPlan Plan(VPPH, &*TC, VPBB1, ScalarHeaderVPBB); + VPBB1->setParent(R1); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr();