From 6f538f6a2d3224efda985e9eb09012fa4275ea92 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 14 Jun 2024 17:41:47 +0000 Subject: [PATCH] Revert "Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)"" This reverts commit 90fd99c0795711e1cf762a02b29b0a702f86a264. This reverts commit 43e6f46936e177e47de6627a74b047ba27561b44. Causes crashes, see comments on https://github.com/llvm/llvm-project/pull/92555. --- .../Vectorize/LoopVectorizationPlanner.h | 17 +- .../Transforms/Vectorize/LoopVectorize.cpp | 236 ++---------------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 86 ------- llvm/lib/Transforms/Vectorize/VPlan.h | 71 +----- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 35 --- .../Transforms/Vectorize/VPlanTransforms.cpp | 5 - llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +- .../RISCV/riscv-vector-reverse.ll | 2 - 8 files changed, 27 insertions(+), 428 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 6011e16076220..c03c278fcebe7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -344,16 +344,6 @@ class LoopVectorizationPlanner { /// A builder used to construct the current plan. VPBuilder Builder; - /// Computes the cost of \p Plan for vectorization factor \p VF. - /// - /// The current implementation requires access to the - /// LoopVectorizationLegality to handle inductions and reductions, which is - /// why it is kept separate from the VPlan-only cost infrastructure. - /// - /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has - /// been retired. - InstructionCost cost(VPlan &Plan, ElementCount VF) const; - public: LoopVectorizationPlanner( Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, @@ -375,9 +365,6 @@ class LoopVectorizationPlanner { /// Return the best VPlan for \p VF. VPlan &getBestPlanFor(ElementCount VF) const; - /// Return the most profitable plan and fix its VF to the most profitable one. - VPlan &getBestPlan() const; - /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// @@ -456,9 +443,7 @@ class LoopVectorizationPlanner { ElementCount MinVF); /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every VF in \p CandidateVFs. This is now only used to - /// verify the decisions by the new VPlan-based cost-model and will be retired - /// once the VPlan-based cost-model is stabilized. + /// This method checks every VF in \p CandidateVFs. VectorizationFactor selectVectorizationFactor(const ElementCountSet &CandidateVFs); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9fc068a068926..37b8023e1fcf2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -290,7 +290,7 @@ static cl::opt ForceTargetMaxVectorInterleaveFactor( cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")); -cl::opt ForceTargetInstructionCost( +static cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " @@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } +/// A helper function that returns the reciprocal of the block probability of +/// predicated blocks. If we return X, we are assuming the predicated block +/// will execute once for every X iterations of the loop header. +/// +/// TODO: We should use actual block probability here, if available. Currently, +/// we always assume predicated blocks have a 50% chance of executing. +static unsigned getReciprocalPredBlockProb() { return 2; } + /// Returns "best known" trip count for the specified loop \p L as defined by /// the following procedure: /// 1) Returns exact trip count if it is known. @@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel { /// \p VF is the vectorization factor chosen for the original loop. bool isEpilogueVectorizationProfitable(const ElementCount VF) const; - /// Return the cost of instructions in an inloop reduction pattern, if I is - /// part of that pattern. - std::optional - getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, - TTI::TargetCostKind CostKind) const; - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - private: unsigned NumPredStores = 0; @@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel { /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); + /// Return the cost of instructions in an inloop reduction pattern, if I is + /// part of that pattern. + std::optional + getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, + TTI::TargetCostKind CostKind) const; + /// Calculate vectorization cost of memory instruction \p I. InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -7280,10 +7288,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (!MaxFactors.hasVector()) return VectorizationFactor::Disabled(); - // Select the optimal vectorization factor according to the legacy cost-model. - // This is now only used to verify the decisions by the new VPlan-based - // cost-model and will be retired once the VPlan-based cost-model is - // stabilized. + // Select the optimal vectorization factor. VectorizationFactor VF = selectVectorizationFactor(VFCandidates); assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); if (!hasPlanWithVF(VF.Width)) { @@ -7294,196 +7299,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VF; } -InstructionCost VPCostContext::getLegacyCost(Instruction *UI, - ElementCount VF) const { - return CM.getInstructionCost(UI, VF).first; -} - -bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { - return (IsVector && CM.VecValuesToIgnore.contains(UI)) || - SkipCostComputation.contains(UI); -} - -InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, - ElementCount VF) const { - InstructionCost Cost = 0; - LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); - VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM); - - // Cost modeling for inductions is inaccurate in the legacy cost model - // compared to the recipes that are generated. To match here initially during - // VPlan cost model bring up directly use the induction costs from the legacy - // cost model. Note that we do this as pre-processing; the VPlan may not have - // any recipes associated with the original induction increment instruction - // and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute - // the cost of both induction increment instructions that are represented by - // recipes and those that are not, to avoid distinguishing between them here, - // and skip all recipes that represent induction increments (the former case) - // later on, if they exist, to avoid counting them twice. Similarly we - // pre-compute the cost of any optimized truncates. - // TODO: Switch to more accurate costing based on VPlan. - for (const auto &[IV, IndDesc] : Legal->getInductionVars()) { - Instruction *IVInc = cast( - IV->getIncomingValueForBlock(OrigLoop->getLoopLatch())); - if (CostCtx.SkipCostComputation.insert(IVInc).second) { - InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF); - LLVM_DEBUG({ - dbgs() << "Cost of " << InductionCost << " for VF " << VF - << ":\n induction increment " << *IVInc << "\n"; - IVInc->dump(); - }); - Cost += InductionCost; - } - for (User *U : IV->users()) { - auto *CI = cast(U); - if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF)) - continue; - assert(!CostCtx.SkipCostComputation.contains(CI) && - "Same cast for multiple inductions?"); - CostCtx.SkipCostComputation.insert(CI); - InstructionCost CastCost = CostCtx.getLegacyCost(CI, VF); - LLVM_DEBUG({ - dbgs() << "Cost of " << CastCost << " for VF " << VF - << ":\n induction cast " << *CI << "\n"; - CI->dump(); - }); - Cost += CastCost; - } - } - - /// Compute the cost of all exiting conditions of the loop using the legacy - /// cost model. This is to match the legacy behavior, which adds the cost of - /// all exit conditions. Note that this over-estimates the cost, as there will - /// be a single condition to control the vector loop. - SmallVector Exiting; - CM.TheLoop->getExitingBlocks(Exiting); - SetVector ExitInstrs; - // Collect all exit conditions. - for (BasicBlock *EB : Exiting) { - auto *Term = dyn_cast(EB->getTerminator()); - if (!Term) - continue; - if (auto *CondI = dyn_cast(Term->getOperand(0))) { - ExitInstrs.insert(CondI); - } - } - // Compute the cost of all instructions only feeding the exit conditions. - for (unsigned I = 0; I != ExitInstrs.size(); ++I) { - Instruction *CondI = ExitInstrs[I]; - if (!OrigLoop->contains(CondI) || - !CostCtx.SkipCostComputation.insert(CondI).second) - continue; - Cost += CostCtx.getLegacyCost(CondI, VF); - for (Value *Op : CondI->operands()) { - auto *OpI = dyn_cast(Op); - if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) { - return !ExitInstrs.contains(cast(U)); - })) - continue; - ExitInstrs.insert(OpI); - } - } - - // The legacy cost model has special logic to compute the cost of in-loop - // reductions, which may be smaller than the sum of all instructions involved - // in the reduction. For AnyOf reductions, VPlan codegen may remove the select - // which the legacy cost model uses to assign cost. Pre-compute their costs - // for now. - // TODO: Switch to costing based on VPlan once the logic has been ported. - for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) { - if (!CM.isInLoopReduction(RedPhi) && - !RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) - continue; - - // AnyOf reduction codegen may remove the select. To match the legacy cost - // model, pre-compute the cost for AnyOf reductions here. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if( - RedPhi->users(), [](User *U) { return isa(U); })); - assert(!CostCtx.SkipCostComputation.contains(Select) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(Select); - auto ReductionCost = CostCtx.getLegacyCost(Select, VF); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n any-of reduction " << *Select << "\n"); - Cost += ReductionCost; - continue; - } - - const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop); - SetVector ChainOpsAndOperands(ChainOps.begin(), - ChainOps.end()); - // Also include the operands of instructions in the chain, as the cost-model - // may mark extends as free. - for (auto *ChainOp : ChainOps) { - for (Value *Op : ChainOp->operands()) { - if (auto *I = dyn_cast(Op)) - ChainOpsAndOperands.insert(I); - } - } - - // Pre-compute the cost for I, if it has a reduction pattern cost. - for (Instruction *I : ChainOpsAndOperands) { - auto ReductionCost = CM.getReductionPatternCost( - I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); - if (!ReductionCost) - continue; - - assert(!CostCtx.SkipCostComputation.contains(I) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(I); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n in-loop reduction " << *I << "\n"); - Cost += *ReductionCost; - } - } - - // Now compute and add the VPlan-based cost. - Cost += Plan.cost(VF, CostCtx); - LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n"); - return Cost; -} - -VPlan &LoopVectorizationPlanner::getBestPlan() const { - // If there is a single VPlan with a single VF, return it directly. - VPlan &FirstPlan = *VPlans[0]; - if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) - return FirstPlan; - - VPlan *BestPlan = &FirstPlan; - ElementCount ScalarVF = ElementCount::getFixed(1); - assert(hasPlanWithVF(ScalarVF) && - "More than a single plan/VF w/o any plan having scalar VF"); - - InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF); - VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost); - - bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; - if (ForceVectorization) { - // Ignore scalar width, because the user explicitly wants vectorization. - // Initialize cost to max so that VF = 2 is, at least, chosen during cost - // evaluation. - BestFactor.Cost = InstructionCost::getMax(); - } - - for (auto &P : VPlans) { - for (ElementCount VF : P->vectorFactors()) { - if (VF.isScalar()) - continue; - InstructionCost Cost = cost(*P, VF); - VectorizationFactor CurrentFactor(VF, Cost, ScalarCost); - if (isMoreProfitable(CurrentFactor, BestFactor)) { - BestFactor = CurrentFactor; - BestPlan = &*P; - } - } - } - BestPlan->setVF(BestFactor.Width); - return *BestPlan; -} - VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { assert(count_if(VPlans, [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == @@ -10342,15 +10157,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, PSI, Checks); - VPlan &BestPlan = LVP.getBestPlan(); - assert(size(BestPlan.vectorFactors()) == 1 && - "Plan should have a single VF"); - ElementCount Width = *BestPlan.vectorFactors().begin(); - LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width - << "\n"); - assert(VF.Width == Width && - "VPlan cost model and legacy cost model disagreed"); - LVP.executePlan(Width, IC, BestPlan, LB, DT, false); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ad6a718320830..f17be451e6846 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -747,64 +747,6 @@ void VPRegionBlock::execute(VPTransformState *State) { State->Instance.reset(); } -InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { - InstructionCost Cost = 0; - for (VPRecipeBase &R : Recipes) - Cost += R.cost(VF, Ctx); - return Cost; -} - -InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { - if (!isReplicator()) { - InstructionCost Cost = 0; - for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) - Cost += Block->cost(VF, Ctx); - return Cost; - } - - // Compute the cost of a replicate region. Replicating isn't supported for - // scalable vectors, return an invalid cost for them. - // TODO: Discard scalable VPlans with replicate recipes earlier after - // construction. - if (VF.isScalable()) - return InstructionCost::getInvalid(); - - // First compute the cost of the conditionally executed recipes, followed by - // account for the branching cost, except if the mask is a header mask or - // uniform condition. - using namespace llvm::VPlanPatternMatch; - VPBasicBlock *Then = cast(getEntry()->getSuccessors()[0]); - InstructionCost ThenCost = Then->cost(VF, Ctx); - - // Note the cost estimates below closely match the current legacy cost model. - auto *BOM = cast(&getEntryBasicBlock()->front()); - VPValue *Cond = BOM->getOperand(0); - - // Check if Cond is a uniform compare or a header mask and don't account for - // branching costs. A uniform condition corresponding to a single branch per - // VF, and the header mask will always be true except in the last iteration. - if (vputils::isUniformBoolean(Cond) || - vputils::isHeaderMask(Cond, *getPlan())) - return ThenCost; - - // For the scalar case, we may not always execute the original predicated - // block, Thus, scale the block's cost by the probability of executing it. - if (VF.isScalar()) - return ThenCost / getReciprocalPredBlockProb(); - - // Add the cost for branches around scalarized and predicated blocks. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - - auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF); - auto FixedVF = VF.getFixedValue(); // Known to be non scalable. - InstructionCost Cost = ThenCost; - Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF), - /*Insert*/ false, /*Extract*/ true, - CostKind); - Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF; - return Cost; -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -978,12 +920,6 @@ void VPlan::execute(VPTransformState *State) { "DT not preserved correctly"); } -InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { - // For now only return the cost of the vector loop region, ignoring any other - // blocks, like the preheader or middle blocks. - return getVectorLoopRegion()->cost(VF, Ctx); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); @@ -1518,25 +1454,3 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } - -bool vputils::isUniformBoolean(VPValue *Cond) { - if (match(Cond, m_Not(m_VPValue()))) - Cond = Cond->getDefiningRecipe()->getOperand(0); - auto *R = Cond->getDefiningRecipe(); - if (!R) - return true; - // TODO: match additional patterns preserving uniformity of booleans, e.g., - // AND/OR/etc. - return match(R, m_Binary(m_VPValue(), m_VPValue())) && - all_of(R->operands(), [](VPValue *Op) { - return vputils::isUniformAfterVectorization(Op); - }); -} - -bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) { - VPValue *Op; - return isa(V) || - match(V, m_ActiveLaneMask(m_VPValue(), m_VPValue())) || - (match(V, m_Binary(m_VPValue(), m_VPValue(Op))) && - Op == Plan.getOrCreateBackedgeTakenCount()); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4c0972e517263..5bb88e4a57dc3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -42,7 +42,6 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/FMF.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/InstructionCost.h" #include #include #include @@ -65,11 +64,8 @@ class VPlan; class VPReplicateRecipe; class VPlanSlp; class Value; -class LoopVectorizationCostModel; class LoopVersioning; -struct VPCostContext; - namespace Intrinsic { typedef unsigned ID; } @@ -86,14 +82,6 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *CurLoop = nullptr); -/// A helper function that returns the reciprocal of the block probability of -/// predicated blocks. If we return X, we are assuming the predicated block -/// will execute once for every X iterations of the loop header. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -inline unsigned getReciprocalPredBlockProb() { return 2; } - /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -636,9 +624,6 @@ class VPBlockBase { /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; - /// Return the cost of the block. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); @@ -722,27 +707,6 @@ class VPLiveOut : public VPUser { #endif }; -/// Struct to hold various analysis needed for cost computations. -struct VPCostContext { - const TargetTransformInfo &TTI; - VPTypeAnalysis Types; - LLVMContext &LLVMCtx; - LoopVectorizationCostModel &CM; - SmallPtrSet SkipCostComputation; - - VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, - LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM) - : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} - - /// Return the cost for \p UI with \p VF using the legacy cost model as - /// fallback until computing the cost of all recipes migrates to VPlan. - InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const; - - /// Return true if the cost for \p UI shouldn't be computed, e.g. because it - /// has already been pre-computed. - bool skipCostComputation(Instruction *UI, bool IsVector) const; -}; - /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -782,11 +746,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// this VPRecipe, thereby "executing" the VPlan. virtual void execute(VPTransformState &State) = 0; - /// Return the cost of this recipe, taking into account if the cost - /// computation should be skipped and the ForceTargetInstructionCost flag. - /// Also takes care of printing the cost for debugging. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); @@ -847,11 +806,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// Returns the debug location of the recipe. DebugLoc getDebugLoc() const { return DL; } - -protected: - /// Compute the cost of this recipe using the legacy cost model and the - /// underlying instructions. - InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const; }; // Helper macro to define common classof implementations for recipes. @@ -1427,6 +1381,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); + assert(UI.getType() == ResultTy && + "result type of underlying cast doesn't match"); } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) @@ -2140,8 +2096,6 @@ class VPInterleaveRecipe : public VPRecipeBase { "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); } - - Instruction *getInsertPos() const { return IG->getInsertPos(); } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -2956,9 +2910,6 @@ class VPBasicBlock : public VPBlockBase { /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - /// Return the cost of this VPBasicBlock. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - /// Return the position of the first non-phi node recipe in the block. iterator getFirstNonPhi(); @@ -3133,9 +3084,6 @@ class VPRegionBlock : public VPBlockBase { /// this VPRegionBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - // Return the cost of this region. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - void dropAllReferences(VPValue *NewValue) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3255,9 +3203,6 @@ class VPlan { /// Generate the IR code for this VPlan. void execute(VPTransformState *State); - /// Return the cost of this plan. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } @@ -3301,11 +3246,6 @@ class VPlan { return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); } - iterator_range::iterator> - vectorFactors() const { - return {VFs.begin(), VFs.end()}; - } - bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } @@ -3725,13 +3665,6 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { return VPI->isVectorToScalar(); return false; } - -/// Return true if \p Cond is a uniform boolean. -bool isUniformBoolean(VPValue *Cond); - -/// Return true if \p V is a header mask in \p Plan. -bool isHeaderMask(VPValue *V, VPlan &Plan); - } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b491ea5a18b54..7a482455473e4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -39,7 +39,6 @@ using VectorParts = SmallVector; namespace llvm { extern cl::opt EnableVPlanNativePath; } -extern cl::opt ForceTargetInstructionCost; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -256,40 +255,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } -InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { - if (auto *S = dyn_cast(this)) { - auto *UI = dyn_cast_or_null(S->getUnderlyingValue()); - if (UI && Ctx.skipCostComputation(UI, VF.isVector())) - return 0; - } - - InstructionCost RecipeCost = computeCost(VF, Ctx); - if (ForceTargetInstructionCost.getNumOccurrences() > 0 && - RecipeCost.isValid()) - RecipeCost = InstructionCost(ForceTargetInstructionCost); - - LLVM_DEBUG({ - dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; - dump(); - }); - return RecipeCost; -} - -InstructionCost VPRecipeBase::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // Compute the cost for the recipe falling back to the legacy cost model using - // the underlying instruction. If there is no underlying instruction, returns - // 0. - Instruction *UI = nullptr; - if (auto *S = dyn_cast(this)) - UI = dyn_cast_or_null(S->getUnderlyingValue()); - else if (auto *IG = dyn_cast(this)) - UI = IG->getInsertPos(); - else if (auto *WidenMem = dyn_cast(this)) - UI = &WidenMem->getIngredient(); - return UI ? Ctx.getLegacyCost(UI, VF) : 0; -} - FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { assert(OpType == OperationType::FPMathOp && "recipe doesn't have fast math flags"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 082a442bf399d..8ec67eb2f54bd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -999,10 +999,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { : Instruction::ZExt; auto *VPC = new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); - if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { - // UnderlyingExt has distinct return type, used to retain legacy cost. - VPC->setUnderlyingValue(UnderlyingExt); - } VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { @@ -1522,7 +1518,6 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( VPInstruction *New = Builder.createOverflowingOp( Instruction::Add, {A, B}, {false, false}, RecWithFlags->getDebugLoc()); - New->setUnderlyingValue(RecWithFlags->getUnderlyingValue()); RecWithFlags->replaceAllUsesWith(New); RecWithFlags->eraseFromParent(); CurRec = New; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index fa6a65ff2f3ad..8d945f6f2b8ea 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -74,7 +74,8 @@ class VPValue { public: /// Return the underlying Value attached to this VPValue. - Value *getUnderlyingValue() const { return UnderlyingVal; } + Value *getUnderlyingValue() { return UnderlyingVal; } + const Value *getUnderlyingValue() const { return UnderlyingVal; } /// An enumeration for keeping track of the concrete subclass of VPValue that /// are actually instantiated. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 41879f3ebef5a..b5aa96eb23f5e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -119,7 +119,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. @@ -261,7 +260,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop.