From ad5541045a63fe3049fc910d843bcbb78f7c7056 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 12 Oct 2020 16:13:26 +0100 Subject: [PATCH 001/123] [LoopDeletion] Remove over-eager SCEV verification. 60b852092c98dbdc6248d60109d90ae6f8ad841c introduced SCEV verification to deleteDeadLoop, but it appears this check is currently a bit over-eager and some users of deleteDeadLoop appear to only patch up SE after calling it (e.g. PR47753). Remove the extra check for now. We can consider adding it back after we tracked down the source of the inconsistency for PR47753. --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index ccb9b6d0bdb4c2..d7cd9b19b8d51d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -724,11 +724,6 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } LI->destroy(L); } - -#ifndef NDEBUG - if (SE) - SE->verify(); -#endif } /// Checks if \p L has single exit through latch block except possibly From c2216d796aab7659771c05303f9d78bad4aeca07 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Mon, 12 Oct 2020 16:38:14 +0200 Subject: [PATCH 002/123] [AMDGPU] Print metadata on error If the metadata is valid yaml, we can print it, even if it failed validation. That makes it easier to debug any wrong metadata. Differential Revision: https://reviews.llvm.org/D89243 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index e7ee0793b903ab..fcc5c002c62480 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -5108,10 +5108,10 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef Desc) { return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"}; AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); + std::string HSAMetadataString; if (!Verifier.verify(MsgPackDoc.getRoot())) - return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"}; + HSAMetadataString = "Invalid AMDGPU Metadata\n"; - std::string HSAMetadataString; raw_string_ostream StrOS(HSAMetadataString); MsgPackDoc.toYAML(StrOS); From 596a9f6b89d0d3e3f2897132ef1283941bd3607b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 9 Oct 2020 16:38:42 -0700 Subject: [PATCH 003/123] [NFC][Regalloc] Pass VirtRegMap by reference. It's never null - the reason it's modeled as a pointer is because the pass can't init it in its ctor. Passing by ref simplifies the code, too, as the null checks were unnecessary complexity. Differential Revision: https://reviews.llvm.org/D89171 --- llvm/include/llvm/CodeGen/CalcSpillWeights.h | 6 +- llvm/lib/CodeGen/CalcSpillWeights.cpp | 58 +++++++++----------- llvm/lib/CodeGen/LiveRangeEdit.cpp | 2 +- llvm/lib/CodeGen/RegAllocBasic.cpp | 2 +- llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +- llvm/lib/CodeGen/RegAllocPBQP.cpp | 4 +- 6 files changed, 35 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index c345c42c777451..78dae81f596e1a 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -46,13 +46,13 @@ class VirtRegMap; class VirtRegAuxInfo { MachineFunction &MF; LiveIntervals &LIS; - VirtRegMap *const VRM; + const VirtRegMap &VRM; const MachineLoopInfo &Loops; const MachineBlockFrequencyInfo &MBFI; public: - VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM, - const MachineLoopInfo &Loops, + VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, + const VirtRegMap &VRM, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {} diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 03490643339f23..0a268a20d365f9 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -76,12 +76,11 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg, } // Check if all values in LI are rematerializable -static bool isRematerializable(const LiveInterval &LI, - const LiveIntervals &LIS, - VirtRegMap *VRM, +static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS, + const VirtRegMap &VRM, const TargetInstrInfo &TII) { unsigned Reg = LI.reg(); - unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; + unsigned Original = VRM.getOriginal(Reg); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I != E; ++I) { const VNInfo *VNI = *I; @@ -96,31 +95,28 @@ static bool isRematerializable(const LiveInterval &LI, // Trace copies introduced by live range splitting. The inline // spiller can rematerialize through these copies, so the spill // weight must reflect this. - if (VRM) { - while (MI->isFullCopy()) { - // The copy destination must match the interval register. - if (MI->getOperand(0).getReg() != Reg) - return false; - - // Get the source register. - Reg = MI->getOperand(1).getReg(); - - // If the original (pre-splitting) registers match this - // copy came from a split. - if (!Register::isVirtualRegister(Reg) || - VRM->getOriginal(Reg) != Original) - return false; - - // Follow the copy live-in value. - const LiveInterval &SrcLI = LIS.getInterval(Reg); - LiveQueryResult SrcQ = SrcLI.Query(VNI->def); - VNI = SrcQ.valueIn(); - assert(VNI && "Copy from non-existing value"); - if (VNI->isPHIDef()) - return false; - MI = LIS.getInstructionFromIndex(VNI->def); - assert(MI && "Dead valno in interval"); - } + while (MI->isFullCopy()) { + // The copy destination must match the interval register. + if (MI->getOperand(0).getReg() != Reg) + return false; + + // Get the source register. + Reg = MI->getOperand(1).getReg(); + + // If the original (pre-splitting) registers match this + // copy came from a split. + if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original) + return false; + + // Follow the copy live-in value. + const LiveInterval &SrcLI = LIS.getInterval(Reg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + VNI = SrcQ.valueIn(); + assert(VNI && "Copy from non-existing value"); + if (VNI->isPHIDef()) + return false; + MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); } if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) @@ -155,9 +151,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, std::pair TargetHint = MRI.getRegAllocationHint(LI.reg()); - if (LI.isSpillable() && VRM) { + if (LI.isSpillable()) { Register Reg = LI.reg(); - Register Original = VRM->getOriginal(Reg); + Register Original = VRM.getOriginal(Reg); const LiveInterval &OrigInt = LIS.getInterval(Original); // li comes from a split of OrigInt. If OrigInt was marked // as not spillable, make sure the new interval is marked diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index dc62c1377ce2c4..b77c4c414e8a2f 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -463,7 +463,7 @@ void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) { - VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); + VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI); for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg())) diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 83b5a05f92e9be..05c8c4e34c692e 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -312,7 +312,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis(), getAnalysis(), getAnalysis()); - VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), + VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis(), getAnalysis()); VRAI.calculateSpillWeightsAndHints(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 03dab75b33e056..cdc1422797fea0 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -3234,7 +3234,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); - VRAI = std::make_unique(*MF, *LIS, VRM, *Loops, *MBFI); + VRAI = std::make_unique(*MF, *LIS, *VRM, *Loops, *MBFI); VRAI->calculateSpillWeightsAndHints(); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 2c6e01376b0c2c..1d9923c38ce26f 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -527,7 +527,7 @@ class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo { } public: - PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM, + PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, const MachineLoopInfo &Loops, const MachineBlockFrequencyInfo &MBFI) : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {} @@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { VirtRegMap &VRM = getAnalysis(); - PBQPVirtRegAuxInfo VRAI(MF, LIS, &VRM, getAnalysis(), MBFI); + PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis(), MBFI); VRAI.calculateSpillWeightsAndHints(); std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); From ea058d289cbf54e5b33aac7f7a13d0d58625f1b9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 2 Oct 2020 19:02:22 +0100 Subject: [PATCH 004/123] [VPlan] Use operands for printing of VPWidenMemoryInstructionRecipe. Now that operands of the recipe are managed through VPUser, we can simplify the printing by just using the operands. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 17 +++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index a009393d029cc4..054920645a9af0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -790,7 +790,7 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { dumpEdges(Region); } -void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { +void VPlanPrinter::printAsIngredient(raw_ostream &O, const Value *V) { std::string IngredientString; raw_string_ostream RSO(IngredientString); if (auto *Inst = dyn_cast(V)) { @@ -903,13 +903,14 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"WIDEN " << VPlanIngredient(&Instr); - O << ", "; - getAddr()->printAsOperand(O, SlotTracker); - VPValue *Mask = getMask(); - if (Mask) { - O << ", "; - Mask->printAsOperand(O, SlotTracker); + O << "\"WIDEN " << Instruction::getOpcodeName(Instr.getOpcode()) << " "; + + bool First = true; + for (VPValue *Op : operands()) { + if (!First) + O << ", "; + Op->printAsOperand(O, SlotTracker); + First = false; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index aa59904ea78e46..30f984fd39d768 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1778,7 +1778,7 @@ class VPlanPrinter { void dump(); - static void printAsIngredient(raw_ostream &O, Value *V); + static void printAsIngredient(raw_ostream &O, const Value *V); }; struct VPlanIngredient { From 43d347995c33a5f48f0b4d9cf3d541a1f6ba66c6 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 9 Oct 2020 10:04:29 -0700 Subject: [PATCH 005/123] [NFC][MC] Use MCRegister in LiveRangeMatrix The change starts from LiveRangeMatrix and also checks the users of the APIs are typed accordingly. Differential Revision: https://reviews.llvm.org/D89145 --- llvm/include/llvm/CodeGen/LiveRegMatrix.h | 15 +++---- llvm/lib/CodeGen/LiveRegMatrix.cpp | 22 +++++----- llvm/lib/CodeGen/RegAllocBase.cpp | 2 +- llvm/lib/CodeGen/RegAllocBase.h | 4 +- llvm/lib/CodeGen/RegAllocBasic.cpp | 15 +++---- llvm/lib/CodeGen/RegAllocGreedy.cpp | 41 ++++++++++--------- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 6 +-- llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp | 17 ++++---- .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp | 2 +- 9 files changed, 64 insertions(+), 60 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index ab4d44f9a61176..a3f8f88e810b99 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -104,19 +104,19 @@ class LiveRegMatrix : public MachineFunctionPass { /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). /// When there is more than one kind of interference, the InterferenceKind /// with the highest enum value is returned. - InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); + InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg); /// Check for interference in the segment [Start, End) that may prevent /// assignment to PhysReg. If this function returns true, there is /// interference in the segment [Start, End) of some other interval already /// assigned to PhysReg. If this function returns false, PhysReg is free at /// the segment [Start, End). - bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg); + bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg); /// Assign VirtReg to PhysReg. /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and /// update VirtRegMap. The live range is expected to be available in PhysReg. - void assign(LiveInterval &VirtReg, unsigned PhysReg); + void assign(LiveInterval &VirtReg, MCRegister PhysReg); /// Unassign VirtReg from its PhysReg. /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes @@ -124,7 +124,7 @@ class LiveRegMatrix : public MachineFunctionPass { void unassign(LiveInterval &VirtReg); /// Returns true if the given \p PhysReg has any live intervals assigned. - bool isPhysRegUsed(unsigned PhysReg) const; + bool isPhysRegUsed(MCRegister PhysReg) const; //===--------------------------------------------------------------------===// // Low-level interface. @@ -136,18 +136,19 @@ class LiveRegMatrix : public MachineFunctionPass { /// Check for regmask interference only. /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. /// If PhysReg is null, check if VirtReg crosses any regmask operands. - bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); + bool checkRegMaskInterference(LiveInterval &VirtReg, + MCRegister PhysReg = MCRegister::NoRegister); /// Check for regunit interference only. /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's /// register units. - bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg); + bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg); /// Query a line of the assigned virtual register matrix directly. /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. /// This returns a reference to an internal Query data structure that is only /// valid until the next query() call. - LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit); + LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit); /// Directly access the live interval unions per regunit. /// This returns an array indexed by the regunit number. diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index 6b1775f28c045e..59c7f93fd915c2 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -78,7 +78,7 @@ void LiveRegMatrix::releaseMemory() { template static bool foreachUnit(const TargetRegisterInfo *TRI, - LiveInterval &VRegInterval, unsigned PhysReg, + LiveInterval &VRegInterval, MCRegister PhysReg, Callable Func) { if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, return false; } -void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { +void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to " << printReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment"); @@ -135,7 +135,7 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) { LLVM_DEBUG(dbgs() << '\n'); } -bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { +bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const { for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { if (!Matrix[*Unit].empty()) return true; @@ -144,7 +144,7 @@ bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { } bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, - unsigned PhysReg) { + MCRegister PhysReg) { // Check if the cached information is valid. // The same BitVector can be reused for all PhysRegs. // We could cache multiple VirtRegs if it becomes necessary. @@ -162,7 +162,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, } bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, - unsigned PhysReg) { + MCRegister PhysReg) { if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI); @@ -176,14 +176,14 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, } LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR, - unsigned RegUnit) { + MCRegister RegUnit) { LiveIntervalUnion::Query &Q = Queries[RegUnit]; Q.init(UserTag, LR, Matrix[RegUnit]); return Q; } LiveRegMatrix::InterferenceKind -LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) { if (VirtReg.empty()) return IK_Free; @@ -197,9 +197,9 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { // Check the matrix for virtual register interference. bool Interference = foreachUnit(TRI, VirtReg, PhysReg, - [&](unsigned Unit, const LiveRange &LR) { - return query(LR, Unit).checkInterference(); - }); + [&](MCRegister Unit, const LiveRange &LR) { + return query(LR, Unit).checkInterference(); + }); if (Interference) return IK_VirtReg; @@ -207,7 +207,7 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { } bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, - unsigned PhysReg) { + MCRegister PhysReg) { // Construct artificial live range containing only one segment [Start, End). VNInfo valno(0, Start); LiveRange::Segment Seg(Start, End, &valno); diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index f7fe1063afeaea..d49a64b3f141bb 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -110,7 +110,7 @@ void RegAllocBase::allocatePhysRegs() { using VirtRegVec = SmallVector; VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 8e931eaae99a0e..3144605345e998 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -101,8 +101,8 @@ class RegAllocBase { // Each call must guarantee forward progess by returning an available PhysReg // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. - virtual Register selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &splitLVRs) = 0; + virtual MCRegister selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl &splitLVRs) = 0; // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 05c8c4e34c692e..58e1e364d285da 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -100,8 +100,8 @@ class RABasic : public MachineFunctionPass, return LI; } - Register selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &SplitVRegs) override; + MCRegister selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl &SplitVRegs) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -253,10 +253,10 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg, // |vregs| * |machineregs|. And since the number of interference tests is // minimal, there is no value in caching them outside the scope of // selectOrSplit(). -Register RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &SplitVRegs) { +MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl &SplitVRegs) { // Populate a list of physical register spill candidates. - SmallVector PhysRegSpillCands; + SmallVector PhysRegSpillCands; // Check for an available register in this class. auto Order = @@ -281,8 +281,9 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg, } // Try to spill another interfering reg with less spill weight. - for (SmallVectorImpl::iterator PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + for (auto PhysRegI = PhysRegSpillCands.begin(), + PhysRegE = PhysRegSpillCands.end(); + PhysRegI != PhysRegE; ++PhysRegI) { if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index cdc1422797fea0..6a804d96c04bf5 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -418,7 +418,8 @@ class RAGreedy : public MachineFunctionPass, Spiller &spiller() override { return *SpillerInstance; } void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; - Register selectOrSplit(LiveInterval&, SmallVectorImpl&) override; + MCRegister selectOrSplit(LiveInterval &, + SmallVectorImpl &) override; void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. @@ -432,8 +433,8 @@ class RAGreedy : public MachineFunctionPass, static char ID; private: - Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, - SmallVirtRegSet &, unsigned = 0); + MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, + SmallVirtRegSet &, unsigned = 0); bool LRE_CanEraseVirtReg(unsigned) override; void LRE_WillShrinkVirtReg(unsigned) override; @@ -459,8 +460,8 @@ class RAGreedy : public MachineFunctionPass, void calcGapWeights(unsigned, SmallVectorImpl&); Register canReassign(LiveInterval &VirtReg, Register PrevReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); - bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&, - const SmallVirtRegSet&); + bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, + const SmallVirtRegSet &); bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost); @@ -869,7 +870,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg, +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) { // It is only possible to evict virtual register interference. @@ -2606,7 +2607,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, SmallLISet RecoloringCandidates; // Record the original mapping virtual register to physical register in case // the recoloring fails. - DenseMap VirtRegToPhysReg; + DenseMap VirtRegToPhysReg; // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". assert(!FixedRegisters.count(VirtReg.reg())); @@ -2701,7 +2702,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, Register ItVirtReg = (*It)->reg(); if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); - Register ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; Matrix->assign(**It, ItPhysReg); } } @@ -2725,8 +2726,8 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, while (!RecoloringQueue.empty()) { LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, - Depth + 1); + MCRegister PhysReg = + selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2753,12 +2754,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, // Main Entry Point //===----------------------------------------------------------------------===// -Register RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &NewVRegs) { +MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs) { CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2902,7 +2903,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { SmallVector RecoloringCandidates; HintsInfo Info; unsigned Reg = VirtReg.reg(); - Register PhysReg = VRM->getPhys(Reg); + MCRegister PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. Visited.insert(Reg); @@ -3014,10 +3015,10 @@ void RAGreedy::tryHintsRecoloring() { } } -Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, - SmallVectorImpl &NewVRegs, - SmallVirtRegSet &FixedRegisters, - unsigned Depth) { +MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { unsigned CostPerUseLimit = ~0u; // First try assigning a free register. auto Order = @@ -3030,8 +3031,8 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // register. if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) { - Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, - CostPerUseLimit, NewVRegs); + MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) // Return now if we decide to use a CSR or create new vregs due to // pre-splitting. diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index 1df86e7ca6b20b..200b2d36848da8 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -118,11 +118,11 @@ GCNNSAReassign::tryAssignRegisters(SmallVectorImpl &Intervals, LRM->unassign(*Intervals[N]); for (unsigned N = 0; N < NumRegs; ++N) - if (LRM->checkInterference(*Intervals[N], StartReg + N)) + if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N))) return false; for (unsigned N = 0; N < NumRegs; ++N) - LRM->assign(*Intervals[N], StartReg + N); + LRM->assign(*Intervals[N], MCRegister::from(StartReg + N)); return true; } @@ -273,7 +273,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0); SmallVector Intervals; - SmallVector OrigRegs; + SmallVector OrigRegs; SlotIndex MinInd, MaxInd; for (unsigned I = 0; I < Info->VAddrDwords; ++I) { const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp index 92d4a646247933..9a27b23ce419a7 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -239,7 +239,8 @@ class GCNRegBankReassign : public MachineFunctionPass { // Search for a register in Bank unused within LI. // Returns phys reg or NoRegister. - unsigned scavengeReg(LiveInterval &LI, unsigned Bank, unsigned SubReg) const; + MCRegister scavengeReg(LiveInterval &LI, unsigned Bank, + unsigned SubReg) const; // Try to reassign candidate. Returns number or stall cycles saved. unsigned tryReassign(Candidate &C); @@ -648,15 +649,15 @@ unsigned GCNRegBankReassign::computeStallCycles(Register SrcReg, Register Reg, return TotalStallCycles; } -unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, - unsigned SubReg) const { +MCRegister GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, + unsigned SubReg) const { const TargetRegisterClass *RC = MRI->getRegClass(LI.reg()); unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs : MaxNumSGPRs; unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0 : AMDGPU::SGPR0); - for (Register Reg : RC->getRegisters()) { + for (MCRegister Reg : RC->getRegisters()) { // Check occupancy limit. if (TRI->isSubRegisterEq(Reg, MaxReg)) break; @@ -667,7 +668,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, for (unsigned I = 0; CSRegs[I]; ++I) if (TRI->isSubRegisterEq(Reg, CSRegs[I]) && !LRM->isPhysRegUsed(CSRegs[I])) - return AMDGPU::NoRegister; + return MCRegister::from(AMDGPU::NoRegister); LLVM_DEBUG(dbgs() << "Trying register " << printReg(Reg) << '\n'); @@ -675,7 +676,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, return Reg; } - return AMDGPU::NoRegister; + return MCRegister::from(AMDGPU::NoRegister); } unsigned GCNRegBankReassign::tryReassign(Candidate &C) { @@ -720,11 +721,11 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) { } llvm::sort(BankStalls); - Register OrigReg = VRM->getPhys(C.Reg); + MCRegister OrigReg = VRM->getPhys(C.Reg); LRM->unassign(LI); while (!BankStalls.empty()) { BankStall BS = BankStalls.pop_back_val(); - Register Reg = scavengeReg(LI, BS.Bank, C.SubReg); + MCRegister Reg = scavengeReg(LI, BS.Bank, C.SubReg); if (Reg == AMDGPU::NoRegister) { LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank) << '\n'); diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp index b6e5235c310a65..21348f89c19d95 100644 --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -104,7 +104,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { LiveInterval &LI = LIS->getInterval(Reg); - for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { + for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { if (!MRI->isPhysRegUsed(PhysReg) && Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { Matrix->assign(LI, PhysReg); From 2f66bfac280f9ae9299dccc357ae10e8a48525ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 12 Oct 2020 17:54:16 +0200 Subject: [PATCH 006/123] [Tests] Regenerate test checks; NFC --- .../test/Transforms/InstCombine/cabs-array.ll | 70 +++++++++++-------- .../Transforms/InstCombine/cabs-discrete.ll | 58 ++++++++------- .../Transforms/InstCombine/fabs-libcall.ll | 9 +-- llvm/test/Transforms/InstCombine/objsize.ll | 20 +++--- 4 files changed, 89 insertions(+), 68 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll index 1c15dc1c5457fb..198badf5ac72a3 100644 --- a/llvm/test/Transforms/InstCombine/cabs-array.ll +++ b/llvm/test/Transforms/InstCombine/cabs-array.ll @@ -1,61 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define double @std_cabs([2 x double] %z) { -; CHECK-LABEL: define double @std_cabs( -; CHECK: tail call double @cabs( +; CHECK-LABEL: @std_cabs( +; CHECK-NEXT: [[CALL:%.*]] = tail call double @cabs([2 x double] [[Z:%.*]]) +; CHECK-NEXT: ret double [[CALL]] +; %call = tail call double @cabs([2 x double] %z) ret double %call } define float @std_cabsf([2 x float] %z) { -; CHECK-LABEL: define float @std_cabsf( -; CHECK: tail call float @cabsf( +; CHECK-LABEL: @std_cabsf( +; CHECK-NEXT: [[CALL:%.*]] = tail call float @cabsf([2 x float] [[Z:%.*]]) +; CHECK-NEXT: ret float [[CALL]] +; %call = tail call float @cabsf([2 x float] %z) ret float %call } define fp128 @std_cabsl([2 x fp128] %z) { -; CHECK-LABEL: define fp128 @std_cabsl( -; CHECK: tail call fp128 @cabsl( +; CHECK-LABEL: @std_cabsl( +; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @cabsl([2 x fp128] [[Z:%.*]]) +; CHECK-NEXT: ret fp128 [[CALL]] +; %call = tail call fp128 @cabsl([2 x fp128] %z) ret fp128 %call } define double @fast_cabs([2 x double] %z) { -; CHECK-LABEL: define double @fast_cabs( -; CHECK: %real = extractvalue [2 x double] %z, 0 -; CHECK: %imag = extractvalue [2 x double] %z, 1 -; CHECK: %1 = fmul fast double %real, %real -; CHECK: %2 = fmul fast double %imag, %imag -; CHECK: %3 = fadd fast double %1, %2 -; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3) -; CHECK: ret double %cabs +; CHECK-LABEL: @fast_cabs( +; CHECK-NEXT: [[REAL:%.*]] = extractvalue [2 x double] [[Z:%.*]], 0 +; CHECK-NEXT: [[IMAG:%.*]] = extractvalue [2 x double] [[Z]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[REAL]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[IMAG]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]]) +; CHECK-NEXT: ret double [[CABS]] +; %call = tail call fast double @cabs([2 x double] %z) ret double %call } define float @fast_cabsf([2 x float] %z) { -; CHECK-LABEL: define float @fast_cabsf( -; CHECK: %real = extractvalue [2 x float] %z, 0 -; CHECK: %imag = extractvalue [2 x float] %z, 1 -; CHECK: %1 = fmul fast float %real, %real -; CHECK: %2 = fmul fast float %imag, %imag -; CHECK: %3 = fadd fast float %1, %2 -; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3) -; CHECK: ret float %cabs +; CHECK-LABEL: @fast_cabsf( +; CHECK-NEXT: [[REAL:%.*]] = extractvalue [2 x float] [[Z:%.*]], 0 +; CHECK-NEXT: [[IMAG:%.*]] = extractvalue [2 x float] [[Z]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[REAL]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[IMAG]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]]) +; CHECK-NEXT: ret float [[CABS]] +; %call = tail call fast float @cabsf([2 x float] %z) ret float %call } define fp128 @fast_cabsl([2 x fp128] %z) { -; CHECK-LABEL: define fp128 @fast_cabsl( -; CHECK: %real = extractvalue [2 x fp128] %z, 0 -; CHECK: %imag = extractvalue [2 x fp128] %z, 1 -; CHECK: %1 = fmul fast fp128 %real, %real -; CHECK: %2 = fmul fast fp128 %imag, %imag -; CHECK: %3 = fadd fast fp128 %1, %2 -; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3) -; CHECK: ret fp128 %cabs +; CHECK-LABEL: @fast_cabsl( +; CHECK-NEXT: [[REAL:%.*]] = extractvalue [2 x fp128] [[Z:%.*]], 0 +; CHECK-NEXT: [[IMAG:%.*]] = extractvalue [2 x fp128] [[Z]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast fp128 [[REAL]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast fp128 [[IMAG]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) +; CHECK-NEXT: ret fp128 [[CABS]] +; %call = tail call fast fp128 @cabsl([2 x fp128] %z) ret fp128 %call } diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll index 405c073c194f00..70e456430b40ee 100644 --- a/llvm/test/Transforms/InstCombine/cabs-discrete.ll +++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll @@ -1,55 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define double @std_cabs(double %real, double %imag) { -; CHECK-LABEL: define double @std_cabs( -; CHECK: tail call double @cabs( +; CHECK-LABEL: @std_cabs( +; CHECK-NEXT: [[CALL:%.*]] = tail call double @cabs(double [[REAL:%.*]], double [[IMAG:%.*]]) +; CHECK-NEXT: ret double [[CALL]] +; %call = tail call double @cabs(double %real, double %imag) ret double %call } define float @std_cabsf(float %real, float %imag) { -; CHECK-LABEL: define float @std_cabsf( -; CHECK: tail call float @cabsf( +; CHECK-LABEL: @std_cabsf( +; CHECK-NEXT: [[CALL:%.*]] = tail call float @cabsf(float [[REAL:%.*]], float [[IMAG:%.*]]) +; CHECK-NEXT: ret float [[CALL]] +; %call = tail call float @cabsf(float %real, float %imag) ret float %call } define fp128 @std_cabsl(fp128 %real, fp128 %imag) { -; CHECK-LABEL: define fp128 @std_cabsl( -; CHECK: tail call fp128 @cabsl( +; CHECK-LABEL: @std_cabsl( +; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @cabsl(fp128 [[REAL:%.*]], fp128 [[IMAG:%.*]]) +; CHECK-NEXT: ret fp128 [[CALL]] +; %call = tail call fp128 @cabsl(fp128 %real, fp128 %imag) ret fp128 %call } define double @fast_cabs(double %real, double %imag) { -; CHECK-LABEL: define double @fast_cabs( -; CHECK: %1 = fmul fast double %real, %real -; CHECK: %2 = fmul fast double %imag, %imag -; CHECK: %3 = fadd fast double %1, %2 -; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3) -; CHECK: ret double %cabs +; CHECK-LABEL: @fast_cabs( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[REAL:%.*]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[IMAG:%.*]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]]) +; CHECK-NEXT: ret double [[CABS]] +; %call = tail call fast double @cabs(double %real, double %imag) ret double %call } define float @fast_cabsf(float %real, float %imag) { -; CHECK-LABEL: define float @fast_cabsf( -; CHECK: %1 = fmul fast float %real, %real -; CHECK: %2 = fmul fast float %imag, %imag -; CHECK: %3 = fadd fast float %1, %2 -; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3) -; CHECK: ret float %cabs +; CHECK-LABEL: @fast_cabsf( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[REAL:%.*]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[IMAG:%.*]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]]) +; CHECK-NEXT: ret float [[CABS]] +; %call = tail call fast float @cabsf(float %real, float %imag) ret float %call } define fp128 @fast_cabsl(fp128 %real, fp128 %imag) { -; CHECK-LABEL: define fp128 @fast_cabsl( -; CHECK: %1 = fmul fast fp128 %real, %real -; CHECK: %2 = fmul fast fp128 %imag, %imag -; CHECK: %3 = fadd fast fp128 %1, %2 -; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3) -; CHECK: ret fp128 %cabs +; CHECK-LABEL: @fast_cabsl( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast fp128 [[REAL:%.*]], [[REAL]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast fp128 [[IMAG:%.*]], [[IMAG]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]]) +; CHECK-NEXT: ret fp128 [[CABS]] +; %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag) ret fp128 %call } diff --git a/llvm/test/Transforms/InstCombine/fabs-libcall.ll b/llvm/test/Transforms/InstCombine/fabs-libcall.ll index 90902bb2fd044e..609529ed3a1b20 100644 --- a/llvm/test/Transforms/InstCombine/fabs-libcall.ll +++ b/llvm/test/Transforms/InstCombine/fabs-libcall.ll @@ -1,11 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s declare x86_fp80 @fabsl(x86_fp80) define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) { ; CHECK-LABEL: @replace_fabs_call_f80( -; CHECK-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x) -; CHECK-NEXT: ret x86_fp80 [[TMP1]] +; CHECK-NEXT: [[FABSL:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) +; CHECK-NEXT: ret x86_fp80 [[FABSL]] ; %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x) ret x86_fp80 %fabsl @@ -13,8 +14,8 @@ define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) { define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) { ; CHECK-LABEL: @fmf_replace_fabs_call_f80( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x) -; CHECK-NEXT: ret x86_fp80 [[TMP1]] +; CHECK-NEXT: [[FABSL:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]]) +; CHECK-NEXT: ret x86_fp80 [[FABSL]] ; %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x) ret x86_fp80 %fabsl diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll index 15f6b44a3a0ea9..ad37fa2a0860a2 100644 --- a/llvm/test/Transforms/InstCombine/objsize.ll +++ b/llvm/test/Transforms/InstCombine/objsize.ll @@ -112,7 +112,7 @@ define void @test3() nounwind { ; CHECK: bb11: ; CHECK-NEXT: unreachable ; CHECK: bb12: -; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__inline_memcpy_chk(i8* bitcast (float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 1) to i8*), i8* undef, i32 512) #3 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__inline_memcpy_chk(i8* bitcast (float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 1) to i8*), i8* undef, i32 512) [[ATTR3:#.*]] ; CHECK-NEXT: unreachable ; entry: @@ -141,7 +141,7 @@ define i32 @test4(i8** %esc) nounwind ssp { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_DATA:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.data* [[TMP0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nonnull align 8 dereferenceable(1824) [[TMP1]], i8 0, i32 1824, i1 false) #0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nonnull align 8 dereferenceable(1824) [[TMP1]], i8 0, i32 1824, i1 false) [[ATTR0:#.*]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[ESC:%.*]] to %struct.data** ; CHECK-NEXT: store %struct.data* [[TMP0]], %struct.data** [[TMP2]], align 4 ; CHECK-NEXT: ret i32 0 @@ -161,9 +161,9 @@ entry: define i8* @test5(i32 %n) nounwind ssp { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #0 +; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) [[ATTR0]] ; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @s, align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 1 dereferenceable(10) [[TMP0]], i8* nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) #0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 1 dereferenceable(10) [[TMP0]], i8* nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) [[ATTR0]] ; CHECK-NEXT: ret i8* [[TMP0]] ; entry: @@ -177,9 +177,9 @@ entry: define void @test6(i32 %n) nounwind ssp { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #0 +; CHECK-NEXT: [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) [[ATTR0]] ; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @s, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @__memcpy_chk(i8* [[TMP0]], i8* [[TMP1]], i32 30, i32 20) #0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8* @__memcpy_chk(i8* [[TMP0]], i8* [[TMP1]], i32 30, i32 20) [[ATTR0]] ; CHECK-NEXT: ret void ; entry: @@ -196,7 +196,7 @@ declare noalias i8* @malloc(i32) nounwind define i32 @test7(i8** %esc) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) i8* @malloc(i32 48) #0 +; CHECK-NEXT: [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) i8* @malloc(i32 48) [[ATTR0]] ; CHECK-NEXT: store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4 ; CHECK-NEXT: ret i32 32 ; @@ -211,7 +211,7 @@ declare noalias i8* @calloc(i32, i32) nounwind define i32 @test8(i8** %esc) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) i8* @calloc(i32 5, i32 7) #0 +; CHECK-NEXT: [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) i8* @calloc(i32 5, i32 7) [[ATTR0]] ; CHECK-NEXT: store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4 ; CHECK-NEXT: ret i32 30 ; @@ -227,7 +227,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind define i32 @test9(i8** %esc) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) #0 +; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) [[ATTR0]] ; CHECK-NEXT: store i8* [[CALL]], i8** [[ESC:%.*]], align 8 ; CHECK-NEXT: ret i32 8 ; @@ -239,7 +239,7 @@ define i32 @test9(i8** %esc) { define i32 @test10(i8** %esc) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(4) i8* @strndup(i8* dereferenceable(8) getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 3) #0 +; CHECK-NEXT: [[CALL:%.*]] = tail call dereferenceable_or_null(4) i8* @strndup(i8* dereferenceable(8) getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 3) [[ATTR0]] ; CHECK-NEXT: store i8* [[CALL]], i8** [[ESC:%.*]], align 8 ; CHECK-NEXT: ret i32 4 ; From 734112343917a011676c2915c5e5d29803a51ba6 Mon Sep 17 00:00:00 2001 From: Konstantin Schwarz Date: Mon, 12 Oct 2020 11:45:33 +0200 Subject: [PATCH 007/123] [GlobalISel][KnownBits] Early return on out of bound shift amounts If the known shift amount is bigger than or equal to the bitwidth of the type of the value to be shifted, the result is target dependent, so don't try to infer any bits. This fixes a crash we've seen in one of our internal test suites. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D89232 --- .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 4 +++ .../CodeGen/GlobalISel/KnownBitsTest.cpp | 35 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 3ebbac9fd659aa..81a89a6eb0b78f 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -397,6 +397,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, uint64_t Shift = RHSKnown.getConstant().getZExtValue(); LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); + // Guard against oversized shift amounts + if (Shift >= MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()) + break; + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, Depth + 1); diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp index faf6f7087ac0cd..5f1d24b1078b65 100644 --- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp @@ -725,3 +725,38 @@ TEST_F(AArch64GISelMITest, TestKnownBitsUMax) { EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue()); EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue()); } + +TEST_F(AArch64GISelMITest, TestInvalidQueries) { + StringRef MIRString = R"( + %src:_(s32) = COPY $w0 + %thirty2:_(s32) = G_CONSTANT i32 32 + %equalSized:_(s32) = G_SHL %src, %thirty2 + %copy1:_(s32) = COPY %equalSized + %thirty3:_(s32) = G_CONSTANT i32 33 + %biggerSized:_(s32) = G_SHL %src, %thirty3 + %copy2:_(s32) = COPY %biggerSized +)"; + setUp(MIRString); + if (!TM) + return; + + Register EqSizedCopyReg = Copies[Copies.size() - 2]; + MachineInstr *EqSizedCopy = MRI->getVRegDef(EqSizedCopyReg); + Register EqSizedShl = EqSizedCopy->getOperand(1).getReg(); + + Register BiggerSizedCopyReg = Copies[Copies.size() - 1]; + MachineInstr *BiggerSizedCopy = MRI->getVRegDef(BiggerSizedCopyReg); + Register BiggerSizedShl = BiggerSizedCopy->getOperand(1).getReg(); + + GISelKnownBits Info(*MF); + KnownBits EqSizeRes = Info.getKnownBits(EqSizedShl); + KnownBits BiggerSizeRes = Info.getKnownBits(BiggerSizedShl); + + + // We don't know what the result of the shift is, but we should not crash + EXPECT_TRUE(EqSizeRes.One.isNullValue()); + EXPECT_TRUE(EqSizeRes.Zero.isNullValue()); + + EXPECT_TRUE(BiggerSizeRes.One.isNullValue()); + EXPECT_TRUE(BiggerSizeRes.Zero.isNullValue()); +} From 17cec6a11a12f815052d56a17ef738cf246a2d9a Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 12 Oct 2020 18:32:25 +0200 Subject: [PATCH 008/123] Revert 1c021c64c "[SCEV] Model ptrtoint(SCEVUnknown) cast not as unknown, but as zext/trunc/self of SCEVUnknown" > While we indeed can't treat them as no-ops, i believe we can/should > do better than just modelling them as `unknown`. `inttoptr` story > is complicated, but for `ptrtoint`, it seems straight-forward > to model it just as a zext-or-trunc of unknown. > > This may be important now that we track towards > making inttoptr/ptrtoint casts not no-op, > and towards preventing folding them into loads/etc > (see D88979/D88789/D88788) > > Reviewed By: mkazantsev > > Differential Revision: https://reviews.llvm.org/D88806 It caused the following assert during Chromium builds: llvm/lib/IR/Constants.cpp:1868: static llvm::Constant *llvm::ConstantExpr::getTrunc(llvm::Constant *, llvm::Type *, bool): Assertion `C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer"' failed. See code review for a link to a reproducer. This reverts commit 1c021c64caef83cccb719c9bf0a2554faa6563af. --- llvm/lib/Analysis/ScalarEvolution.cpp | 43 +++---------- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 2 +- .../add-expr-pointer-operand-sorting.ll | 4 +- .../ScalarEvolution/no-wrap-add-exprs.ll | 4 +- .../test/Analysis/ScalarEvolution/ptrtoint.ll | 60 +++++++++---------- llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll | 4 +- llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 4 +- .../IndVarSimplify/2011-11-01-lftrptr.ll | 16 +++-- .../CodeGen/scev_looking_through_bitcasts.ll | 3 +- 9 files changed, 53 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 036d80649110aa..1d3e26b93cb6aa 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3505,15 +3505,15 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { + // We can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. if (isa(AllocTy)) { Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo()); Constant *One = ConstantInt::get(IntTy, 1); Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One); - return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy)); + return getSCEV(ConstantExpr::getPtrToInt(GEP, IntTy)); } - // We can bypass creating a target-independent - // constant expression and then folding it back into a ConstantInt. - // This is just a compile-time optimization. return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); } @@ -6301,36 +6301,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; - case Instruction::PtrToInt: { - // It's tempting to handle inttoptr and ptrtoint as no-ops, - // however this can lead to pointer expressions which cannot safely be - // expanded to GEPs because ScalarEvolution doesn't respect - // the GEP aliasing rules when simplifying integer expressions. - // - // However, given - // %x = ??? - // %y = ptrtoint %x - // %z = ptrtoint %x - // it is safe to say that %y and %z are the same thing. - // - // So instead of modelling the cast itself as unknown, - // since the casts are transparent within SCEV, - // we can at least model the casts original value as unknow instead. - - // BUT, there's caveat. If we simply model %x as unknown, unrelated uses - // of %x will also see it as unknown, which is obviously bad. - // So we can only do this iff %x would be modelled as unknown anyways. - auto *OpSCEV = getSCEV(U->getOperand(0)); - if (isa(OpSCEV)) - return getTruncateOrZeroExtend(OpSCEV, U->getType()); - // If we can model the operand, however, we must fallback to modelling - // the whole cast as unknown instead. - LLVM_FALLTHROUGH; - } - case Instruction::IntToPtr: - // We can't do this for inttoptr at all, however. - return getUnknown(V); - case Instruction::SDiv: // If both operands are non-negative, this is just an udiv. if (isKnownNonNegative(getSCEV(U->getOperand(0))) && @@ -6345,6 +6315,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); break; + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can + // lead to pointer expressions which cannot safely be expanded to GEPs, + // because ScalarEvolution doesn't respect the GEP aliasing rules when + // simplifying integer expressions. + case Instruction::GetElementPtr: return createNodeForGEP(cast(U)); diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 3e280a66175c88..2d71b0fff88940 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -427,7 +427,7 @@ static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp, : &ScalarEvolution::getZeroExtendExpr; // Check ext(LHS op RHS) == ext(LHS) op ext(RHS) - auto *NarrowTy = cast(SE->getEffectiveSCEVType(LHS->getType())); + auto *NarrowTy = cast(LHS->getType()); auto *WideTy = IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll index e798e2715ba1da..93a3bf4d4c3786 100644 --- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll +++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll @@ -33,9 +33,9 @@ define i32 @d(i32 %base) { ; CHECK-NEXT: %1 = load i32*, i32** @c, align 8 ; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64 -; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: --> %sub.ptr.lhs.cast U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint ([1 x i32]* @b to i64) -; CHECK-NEXT: --> ((-1 * @b) + %1) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } +; CHECK-NEXT: --> ((-1 * ptrtoint ([1 x i32]* @b to i64)) + %sub.ptr.lhs.cast) U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4 ; CHECK-NEXT: --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <> LoopDispositions: { %for.cond: Variant } ; CHECK-NEXT: %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll index eb669cab0c790f..5a7bb3c9e5cd54 100644 --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll @@ -170,14 +170,14 @@ define void @f3(i8* %x_addr, i8* %y_addr, i32* %tmp_addr) { %int5 = add i32 %int0, 5 %int.zext = zext i32 %int5 to i64 ; CHECK: %int.zext = zext i32 %int5 to i64 -; CHECK-NEXT: --> (1 + (zext i32 (4 + (trunc [16 x i8]* @z_addr to i32)) to i64)) U: [1,4294967294) S: [1,4294967297) +; CHECK-NEXT: --> (1 + (zext i32 (4 + %int0) to i64)) U: [1,4294967294) S: [1,4294967297) %ptr_noalign = bitcast [16 x i8]* @z_addr_noalign to i8* %int0_na = ptrtoint i8* %ptr_noalign to i32 %int5_na = add i32 %int0_na, 5 %int.zext_na = zext i32 %int5_na to i64 ; CHECK: %int.zext_na = zext i32 %int5_na to i64 -; CHECK-NEXT: --> (zext i32 (5 + (trunc [16 x i8]* @z_addr_noalign to i32)) to i64) U: [0,4294967296) S: [0,4294967296) +; CHECK-NEXT: --> (zext i32 (5 + %int0_na) to i64) U: [0,4294967296) S: [0,4294967296) %tmp = load i32, i32* %tmp_addr %mul = and i32 %tmp, -4 diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index ac08fb24775e51..e3e9330e241f83 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -16,25 +16,25 @@ define void @ptrtoint(i8* %in, i64* %out0, i32* %out1, i16* %out2, i128* %out3) ; X64-LABEL: 'ptrtoint' ; X64-NEXT: Classifying expressions for: @ptrtoint ; X64-NEXT: %p0 = ptrtoint i8* %in to i64 -; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: --> %p0 U: full-set S: full-set ; X64-NEXT: %p1 = ptrtoint i8* %in to i32 -; X64-NEXT: --> (trunc i8* %in to i32) U: full-set S: full-set +; X64-NEXT: --> %p1 U: full-set S: full-set ; X64-NEXT: %p2 = ptrtoint i8* %in to i16 -; X64-NEXT: --> (trunc i8* %in to i16) U: full-set S: full-set +; X64-NEXT: --> %p2 U: full-set S: full-set ; X64-NEXT: %p3 = ptrtoint i8* %in to i128 -; X64-NEXT: --> (zext i8* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616) +; X64-NEXT: --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616) ; X64-NEXT: Determining loop execution counts for: @ptrtoint ; ; X32-LABEL: 'ptrtoint' ; X32-NEXT: Classifying expressions for: @ptrtoint ; X32-NEXT: %p0 = ptrtoint i8* %in to i64 -; X32-NEXT: --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p0 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: %p1 = ptrtoint i8* %in to i32 -; X32-NEXT: --> %in U: full-set S: full-set +; X32-NEXT: --> %p1 U: full-set S: full-set ; X32-NEXT: %p2 = ptrtoint i8* %in to i16 -; X32-NEXT: --> (trunc i8* %in to i16) U: full-set S: full-set +; X32-NEXT: --> %p2 U: full-set S: full-set ; X32-NEXT: %p3 = ptrtoint i8* %in to i128 -; X32-NEXT: --> (zext i8* %in to i128) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p3 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: Determining loop execution counts for: @ptrtoint ; %p0 = ptrtoint i8* %in to i64 @@ -53,25 +53,25 @@ define void @ptrtoint_as1(i8 addrspace(1)* %in, i64* %out0, i32* %out1, i16* %ou ; X64-LABEL: 'ptrtoint_as1' ; X64-NEXT: Classifying expressions for: @ptrtoint_as1 ; X64-NEXT: %p0 = ptrtoint i8 addrspace(1)* %in to i64 -; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: --> %p0 U: full-set S: full-set ; X64-NEXT: %p1 = ptrtoint i8 addrspace(1)* %in to i32 -; X64-NEXT: --> (trunc i8 addrspace(1)* %in to i32) U: full-set S: full-set +; X64-NEXT: --> %p1 U: full-set S: full-set ; X64-NEXT: %p2 = ptrtoint i8 addrspace(1)* %in to i16 -; X64-NEXT: --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set +; X64-NEXT: --> %p2 U: full-set S: full-set ; X64-NEXT: %p3 = ptrtoint i8 addrspace(1)* %in to i128 -; X64-NEXT: --> (zext i8 addrspace(1)* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616) +; X64-NEXT: --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616) ; X64-NEXT: Determining loop execution counts for: @ptrtoint_as1 ; ; X32-LABEL: 'ptrtoint_as1' ; X32-NEXT: Classifying expressions for: @ptrtoint_as1 ; X32-NEXT: %p0 = ptrtoint i8 addrspace(1)* %in to i64 -; X32-NEXT: --> (zext i8 addrspace(1)* %in to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p0 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: %p1 = ptrtoint i8 addrspace(1)* %in to i32 -; X32-NEXT: --> %in U: full-set S: full-set +; X32-NEXT: --> %p1 U: full-set S: full-set ; X32-NEXT: %p2 = ptrtoint i8 addrspace(1)* %in to i16 -; X32-NEXT: --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set +; X32-NEXT: --> %p2 U: full-set S: full-set ; X32-NEXT: %p3 = ptrtoint i8 addrspace(1)* %in to i128 -; X32-NEXT: --> (zext i8 addrspace(1)* %in to i128) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p3 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: Determining loop execution counts for: @ptrtoint_as1 ; %p0 = ptrtoint i8 addrspace(1)* %in to i64 @@ -92,7 +92,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) { ; X64-NEXT: %in_casted = bitcast i8* %in to float* ; X64-NEXT: --> %in U: full-set S: full-set ; X64-NEXT: %p0 = ptrtoint float* %in_casted to i64 -; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: --> %p0 U: full-set S: full-set ; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_bitcast ; ; X32-LABEL: 'ptrtoint_of_bitcast' @@ -100,7 +100,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) { ; X32-NEXT: %in_casted = bitcast i8* %in to float* ; X32-NEXT: --> %in U: full-set S: full-set ; X32-NEXT: %p0 = ptrtoint float* %in_casted to i64 -; X32-NEXT: --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p0 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_bitcast ; %in_casted = bitcast i8* %in to float* @@ -116,7 +116,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) { ; X64-NEXT: %in_casted = addrspacecast i8* %in to i8 addrspace(1)* ; X64-NEXT: --> %in_casted U: full-set S: full-set ; X64-NEXT: %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64 -; X64-NEXT: --> %in_casted U: full-set S: full-set +; X64-NEXT: --> %p0 U: full-set S: full-set ; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_addrspacecast ; ; X32-LABEL: 'ptrtoint_of_addrspacecast' @@ -124,7 +124,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) { ; X32-NEXT: %in_casted = addrspacecast i8* %in to i8 addrspace(1)* ; X32-NEXT: --> %in_casted U: full-set S: full-set ; X32-NEXT: %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64 -; X32-NEXT: --> (zext i8 addrspace(1)* %in_casted to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p0 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_addrspacecast ; %in_casted = addrspacecast i8* %in to i8 addrspace(1)* @@ -140,7 +140,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) { ; X64-NEXT: %in_casted = inttoptr i64 %in to i8* ; X64-NEXT: --> %in_casted U: full-set S: full-set ; X64-NEXT: %p0 = ptrtoint i8* %in_casted to i64 -; X64-NEXT: --> %in_casted U: full-set S: full-set +; X64-NEXT: --> %p0 U: full-set S: full-set ; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_inttoptr ; ; X32-LABEL: 'ptrtoint_of_inttoptr' @@ -148,7 +148,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) { ; X32-NEXT: %in_casted = inttoptr i64 %in to i8* ; X32-NEXT: --> %in_casted U: full-set S: full-set ; X32-NEXT: %p0 = ptrtoint i8* %in_casted to i64 -; X32-NEXT: --> (zext i8* %in_casted to i64) U: [0,4294967296) S: [0,4294967296) +; X32-NEXT: --> %p0 U: [0,4294967296) S: [-4294967296,4294967296) ; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_inttoptr ; %in_casted = inttoptr i64 %in to i8* @@ -197,17 +197,11 @@ define void @ptrtoint_of_nullptr(i64* %out0) { ; A constant inttoptr argument of an ptrtoint is still bad. define void @ptrtoint_of_constantexpr_inttoptr(i64* %out0) { -; X64-LABEL: 'ptrtoint_of_constantexpr_inttoptr' -; X64-NEXT: Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr -; X64-NEXT: %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64 -; X64-NEXT: --> inttoptr (i64 42 to i8*) U: [42,43) S: [-64,64) -; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr -; -; X32-LABEL: 'ptrtoint_of_constantexpr_inttoptr' -; X32-NEXT: Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr -; X32-NEXT: %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64 -; X32-NEXT: --> (zext i8* inttoptr (i64 42 to i8*) to i64) U: [42,43) S: [0,4294967296) -; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr +; ALL-LABEL: 'ptrtoint_of_constantexpr_inttoptr' +; ALL-NEXT: Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr +; ALL-NEXT: %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64 +; ALL-NEXT: --> %p0 U: [42,43) S: [-64,64) +; ALL-NEXT: Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr ; %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64 store i64 %p0, i64* %out0 diff --git a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll index e7339721447580..564328d999982c 100644 --- a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll +++ b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll @@ -186,9 +186,7 @@ define linkonce_odr i32 @vector_insert(%"class.std::__1::vector.182"*, [1 x i32] br i1 %114, label %124, label %115 ; CHECK-LABEL: .preheader: -; CHECK-NEXT: [[NEG_NEW:%[0-9]+]] = sub i32 0, [[NEW_CAST]] -; CHECK-NEXT: getelementptr i8, i8* %97, i32 [[NEG_NEW]] - +; CHECK-NEXT: sub i32 [[OLD_CAST]], [[NEW_CAST]] ;