diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h index 7e07e9d065b37d..65772715095f20 100644 --- a/clang-tools-extra/clangd/ConfigFragment.h +++ b/clang-tools-extra/clangd/ConfigFragment.h @@ -51,8 +51,8 @@ template struct Located { : Range(Range), Value(std::move(Value)) {} llvm::SMRange Range; - T &operator->() { return Value; } - const T &operator->() const { return Value; } + T *operator->() { return &Value; } + const T *operator->() const { return &Value; } T &operator*() { return Value; } const T &operator*() const { return Value; } diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index af8fe91431c884..87d48adc7f2791 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -56,14 +56,14 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// Called when a virtual register is no longer used. Return false to defer /// its deletion from LiveIntervals. - virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } + virtual bool LRE_CanEraseVirtReg(Register) { return true; } /// Called before shrinking the live range of a virtual register. - virtual void LRE_WillShrinkVirtReg(unsigned) {} + virtual void LRE_WillShrinkVirtReg(Register) {} /// Called after cloning a virtual register. /// This is used for new registers representing connected components of Old. - virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} + virtual void LRE_DidCloneVirtReg(Register New, Register Old) {} }; private: diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 5d5a6efab22077..68fc129cc0eda9 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1737,6 +1737,21 @@ class TargetInstrInfo : public MCInstrInfo { return 5; } + /// Return the maximal number of alias checks on memory operands. For + /// instructions with more than one memory operands, the alias check on a + /// single MachineInstr pair has quadratic overhead and results in + /// unacceptable performance in the worst case. The limit here is to clamp + /// that maximal checks performed. Usually, that's the product of memory + /// operand numbers from that pair of MachineInstr to be checked. For + /// instance, with two MachineInstrs with 4 and 5 memory operands + /// correspondingly, a total of 20 checks are required. With this limit set to + /// 16, their alias check is skipped. We choose to limit the product instead + /// of the individual instruction as targets may have special MachineInstrs + /// with a considerably high number of memory operands, such as `ldm` in ARM. + /// Setting this limit per MachineInstr would result in either too high + /// overhead or too rigid restriction. + virtual unsigned getMemOperandAACheckLimit() const { return 16; } + /// Return an array that contains the ids of the target indices (used for the /// TargetIndex machine operand) and their names. /// diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 07f7c948d040a8..975d51c4cd1310 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -153,7 +153,7 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { unsigned Original); bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); void hoistAllSpills(); - void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void LRE_DidCloneVirtReg(Register, Register) override; }; class InlineSpiller : public Spiller { @@ -1551,7 +1551,7 @@ void HoistSpillHelper::hoistAllSpills() { /// For VirtReg clone, the \p New register should have the same physreg or /// stackslot as the \p old register. -void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { +void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) { if (VRM.hasPhys(Old)) VRM.assignVirt2Phys(New, VRM.getPhys(Old)); else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index d45c53b8181681..fd658cdb41b911 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1276,81 +1276,96 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other, if (TII->areMemAccessesTriviallyDisjoint(*this, Other)) return false; - // FIXME: Need to handle multiple memory operands to support all targets. - if (!hasOneMemOperand() || !Other.hasOneMemOperand()) + // Memory operations without memory operands may access anything. Be + // conservative and assume `MayAlias`. + if (memoperands_empty() || Other.memoperands_empty()) return true; - MachineMemOperand *MMOa = *memoperands_begin(); - MachineMemOperand *MMOb = *Other.memoperands_begin(); - - // The following interface to AA is fashioned after DAGCombiner::isAlias - // and operates with MachineMemOperand offset with some important - // assumptions: - // - LLVM fundamentally assumes flat address spaces. - // - MachineOperand offset can *only* result from legalization and - // cannot affect queries other than the trivial case of overlap - // checking. - // - These offsets never wrap and never step outside - // of allocated objects. - // - There should never be any negative offsets here. - // - // FIXME: Modify API to hide this math from "user" - // Even before we go to AA we can reason locally about some - // memory objects. It can save compile time, and possibly catch some - // corner cases not currently covered. - - int64_t OffsetA = MMOa->getOffset(); - int64_t OffsetB = MMOb->getOffset(); - int64_t MinOffset = std::min(OffsetA, OffsetB); - - uint64_t WidthA = MMOa->getSize(); - uint64_t WidthB = MMOb->getSize(); - bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; - bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; - - const Value *ValA = MMOa->getValue(); - const Value *ValB = MMOb->getValue(); - bool SameVal = (ValA && ValB && (ValA == ValB)); - if (!SameVal) { - const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); - const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); - if (PSVa && ValB && !PSVa->mayAlias(&MFI)) - return false; - if (PSVb && ValA && !PSVb->mayAlias(&MFI)) - return false; - if (PSVa && PSVb && (PSVa == PSVb)) - SameVal = true; - } + // Skip if there are too many memory operands. + auto NumChecks = getNumMemOperands() * Other.getNumMemOperands(); + if (NumChecks > TII->getMemOperandAACheckLimit()) + return true; + + auto HasAlias = [MFI, AA, UseTBAA](const MachineMemOperand *MMOa, + const MachineMemOperand *MMOb) { + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + int64_t MinOffset = std::min(OffsetA, OffsetB); + + uint64_t WidthA = MMOa->getSize(); + uint64_t WidthB = MMOb->getSize(); + bool KnownWidthA = WidthA != MemoryLocation::UnknownSize; + bool KnownWidthB = WidthB != MemoryLocation::UnknownSize; + + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && ValB && !PSVa->mayAlias(&MFI)) + return false; + if (PSVb && ValA && !PSVb->mayAlias(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + + if (SameVal) { + if (!KnownWidthA || !KnownWidthB) + return true; + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } - if (SameVal) { - if (!KnownWidthA || !KnownWidthB) + if (!AA) return true; - int64_t MaxOffset = std::max(OffsetA, OffsetB); - int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; - return (MinOffset + LowWidth > MaxOffset); - } - if (!AA) - return true; + if (!ValA || !ValB) + return true; - if (!ValA || !ValB) - return true; + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); - assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset + : MemoryLocation::UnknownSize; + int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset + : MemoryLocation::UnknownSize; - int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset - : MemoryLocation::UnknownSize; - int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset - : MemoryLocation::UnknownSize; + AliasResult AAResult = + AA->alias(MemoryLocation(ValA, OverlapA, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, OverlapB, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); - AliasResult AAResult = AA->alias( - MemoryLocation(ValA, OverlapA, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(ValB, OverlapB, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + return (AAResult != NoAlias); + }; - return (AAResult != NoAlias); + // Check each pair of memory operands from both instructions, which can't + // alias only if all pairs won't alias. + for (auto *MMOa : memoperands()) + for (auto *MMOb : Other.memoperands()) + if (HasAlias(MMOa, MMOb)) + return true; + + return false; } /// hasOrderedMemoryRef - Return true if this instruction may have an ordered diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index d49a64b3f141bb..aa749ca43e74fc 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = Register::index2VirtReg(i); + Register Reg = Register::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; enqueue(&LIS->getInterval(Reg)); @@ -143,7 +143,7 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); - for (unsigned Reg : SplitVRegs) { + for (Register Reg : SplitVRegs) { assert(LIS->hasInterval(Reg)); LiveInterval *SplitVirtReg = &LIS->getInterval(Reg); diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index f96dc13132ed0c..8f2cb48c5d69b7 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -72,8 +72,8 @@ class RABasic : public MachineFunctionPass, // selectOrSplit(). BitVector UsableRegs; - bool LRE_CanEraseVirtReg(unsigned) override; - void LRE_WillShrinkVirtReg(unsigned) override; + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; public: RABasic(); @@ -146,7 +146,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, false) -bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { +bool RABasic::LRE_CanEraseVirtReg(Register VirtReg) { LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { Matrix->unassign(LI); @@ -161,7 +161,7 @@ bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { return false; } -void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) { +void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) { if (!VRM->hasPhys(VirtReg)) return; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index ecb9a5a2c53aea..166414e4ffa178 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -441,9 +441,9 @@ class RAGreedy : public MachineFunctionPass, MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl &, SmallVirtRegSet &, unsigned = 0); - bool LRE_CanEraseVirtReg(unsigned) override; - void LRE_WillShrinkVirtReg(unsigned) override; - void LRE_DidCloneVirtReg(unsigned, unsigned) override; + bool LRE_CanEraseVirtReg(Register) override; + void LRE_WillShrinkVirtReg(Register) override; + void LRE_DidCloneVirtReg(Register, Register) override; void enqueue(PQueue &CurQueue, LiveInterval *LI); LiveInterval *dequeue(PQueue &CurQueue); @@ -470,9 +470,9 @@ class RAGreedy : public MachineFunctionPass, bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost); - unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, SlotIndex Start, - SlotIndex End, float *BestEvictWeight); + MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, SlotIndex Start, + SlotIndex End, float *BestEvictWeight); void evictInterference(LiveInterval &, MCRegister, SmallVectorImpl &); bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, @@ -499,9 +499,10 @@ class RAGreedy : public MachineFunctionPass, SmallVectorImpl &NewVRegs); /// Check other options before using a callee-saved register for the first /// time. - unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - Register PhysReg, unsigned &CostPerUseLimit, - SmallVectorImpl &NewVRegs); + MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, MCRegister PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs); void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); @@ -536,7 +537,7 @@ class RAGreedy : public MachineFunctionPass, using HintsInfo = SmallVector; BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister); - void collectHintInfo(unsigned, HintsInfo &); + void collectHintInfo(Register, HintsInfo &); bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; @@ -633,7 +634,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { +bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) { LiveInterval &LI = LIS->getInterval(VirtReg); if (VRM->hasPhys(VirtReg)) { Matrix->unassign(LI); @@ -648,7 +649,7 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { return false; } -void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { +void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) { if (!VRM->hasPhys(VirtReg)) return; @@ -658,7 +659,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { enqueue(&LI); } -void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { +void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) { // Cloning a register we haven't even heard about yet? Just ignore it. if (!ExtraRegInfo.inBounds(Old)) return; @@ -684,9 +685,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); - const unsigned Reg = LI->reg(); - assert(Register::isVirtualRegister(Reg) && - "Can only enqueue virtual registers"); + const Register Reg = LI->reg(); + assert(Reg.isVirtual() && "Can only enqueue virtual registers"); unsigned Prio; ExtraRegInfo.grow(Reg); @@ -1026,17 +1026,17 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, /// \param BestEvictweight The eviction cost of that eviction /// \return The PhysReg which is the best candidate for eviction and the /// eviction cost in BestEvictweight -unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictweight) { +MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictweight) { EvictionCost BestEvictCost; BestEvictCost.setMax(); BestEvictCost.MaxWeight = VirtReg.weight(); - unsigned BestEvicteePhys = 0; + MCRegister BestEvicteePhys; // Go over all physical registers and find the best candidate for eviction - for (auto PhysReg : Order.getOrder()) { + for (MCRegister PhysReg : Order.getOrder()) { if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, BestEvictCost)) @@ -1498,7 +1498,7 @@ bool RAGreedy::splitCanCauseEvictionChain(Register Evictee, return false; float MaxWeight = 0; - unsigned FutureEvictedPhysReg = + MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); @@ -1559,7 +1559,7 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, // Check if the local interval will evict a cheaper interval. float CheapestEvictWeight = 0; - unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight( + MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight( Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), Cand.Intf.last(), &CheapestEvictWeight); @@ -1688,7 +1688,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Isolate even single instructions when dealing with a proper sub-class. // That guarantees register class inflation for the stack interval because it // is all copies. - unsigned Reg = SA->getParent().reg(); + Register Reg = SA->getParent().reg(); bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); // First handle all the blocks with uses. @@ -2051,7 +2051,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// Get the number of allocatable registers that match the constraints of \p Reg /// on \p MI and that are also in \p SuperRC. static unsigned getNumAllocatableRegsForConstraints( - const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI) { assert(SuperRC && "Invalid register class"); @@ -2791,11 +2791,10 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// Spilling a live range in the cold path can have lower cost than using /// the CSR for the first time. Returns the physical register if we decide /// to use the CSR; otherwise return 0. -unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, - AllocationOrder &Order, - Register PhysReg, - unsigned &CostPerUseLimit, - SmallVectorImpl &NewVRegs) { +MCRegister +RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + MCRegister PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl &NewVRegs) { if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -2860,7 +2859,7 @@ void RAGreedy::initializeCSRCost() { /// Collect the hint info for \p Reg. /// The results are stored into \p Out. /// \p Out is not cleared before being populated. -void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { +void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { if (!Instr.isFullCopy()) continue; @@ -2872,9 +2871,8 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { continue; } // Get the current assignment. - Register OtherPhysReg = Register::isPhysicalRegister(OtherReg) - ? OtherReg - : Register(VRM->getPhys(OtherReg)); + MCRegister OtherPhysReg = + OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); // Push the collected information. Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, OtherPhysReg)); @@ -2906,10 +2904,10 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { // We have a broken hint, check if it is possible to fix it by // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted // some register and PhysReg may be available for the other live-ranges. - SmallSet Visited; + SmallSet Visited; SmallVector RecoloringCandidates; HintsInfo Info; - unsigned Reg = VirtReg.reg(); + Register Reg = VirtReg.reg(); MCRegister PhysReg = VRM->getPhys(Reg); // Start the recoloring algorithm from the input live-interval, then // it will propagate to the ones that are copy-related with it. @@ -3030,7 +3028,8 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // First try assigning a free register. auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); - if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { + if (MCRegister PhysReg = + tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { // If VirtReg got an assignment, the eviction info is no longre relevant. LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 4d610abc3dfe5f..bb2db85a090bb8 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -146,12 +146,6 @@ class RegAllocPBQP : public MachineFunctionPass { } private: - using LI2NodeMap = std::map; - using Node2LIMap = std::vector; - using AllowedSet = std::vector; - using AllowedSetMap = std::vector; - using RegPair = std::pair; - using CoalesceMap = std::map; using RegSet = std::set; char *customPassID; @@ -660,8 +654,9 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); continue; - } else - VRegAllowedMap[VReg] = std::move(VRegAllowed); + } + + VRegAllowedMap[VReg.id()] = std::move(VRegAllowed); } for (auto &KV : VRegAllowedMap) { @@ -774,7 +769,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, if (PReg == 0) { const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg()); const ArrayRef RawPRegOrder = RC.getRawAllocationOrder(MF); - for (unsigned CandidateReg : RawPRegOrder) { + for (MCRegister CandidateReg : RawPRegOrder) { if (!VRM.getRegInfo().isReserved(CandidateReg)) { PReg = CandidateReg; break; diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 7258feafb7e909..2b3715d02e9bdf 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -373,6 +373,36 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // before visiting the memcpy block (which will contain the lifetime start // for "b" then it will appear that 'b' has a degenerate lifetime. // +// Handle Windows Exception with LifetimeStartOnFirstUse: +// ----------------- +// +// There was a bug for using LifetimeStartOnFirstUse in win32. +// class Type1 { +// ... +// ~Type1(){ write memory;} +// } +// ... +// try{ +// Type1 V +// ... +// } catch (Type2 X){ +// ... +// } +// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots +// to prevent using LifetimeStartOnFirstUse. Because pX may merged with +// object V which may call destructor after implicitly writing pX. All these +// are done in C++ EH runtime libs (through CxxThrowException), and can't +// obviously check it in IR level. +// +// The loader of pX, without obvious writing IR, is usually the first LOAD MI +// in EHPad, Some like: +// bb.x.catch.i (landing-pad, ehfunclet-entry): +// ; predecessors: %bb... +// successors: %bb... +// %n:gr32 = MOV32rm %stack.pX ... +// ... +// The Type2** %stack.pX will only be written in EH runtime libs, so we +// check the StoreSlots to screen it out. namespace { @@ -434,6 +464,9 @@ class StackColoring : public MachineFunctionPass { /// slots lifetime-start-on-first-use is disabled). BitVector ConservativeSlots; + /// Record the FI slots referenced by a 'may write to memory'. + BitVector StoreSlots; + /// Number of iterations taken during data flow analysis. unsigned NumIterations; @@ -629,10 +662,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { InterestingSlots.resize(NumSlot); ConservativeSlots.clear(); ConservativeSlots.resize(NumSlot); + StoreSlots.clear(); + StoreSlots.resize(NumSlot); // number of start and end lifetime ops for each slot SmallVector NumStartLifetimes(NumSlot, 0); SmallVector NumEndLifetimes(NumSlot, 0); + SmallVector NumLoadInCatchPad(NumSlot, 0); // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. @@ -687,6 +723,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { if (! BetweenStartEnd.test(Slot)) { ConservativeSlots.set(Slot); } + // Here we check the StoreSlots to screen catch point out. For more + // information, please refer "Handle Windows Exception with + // LifetimeStartOnFirstUse" at the head of this file. + if (MI.mayStore()) + StoreSlots.set(Slot); + if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad()) + NumLoadInCatchPad[Slot] += 1; } } } @@ -697,11 +740,14 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { return 0; } - // PR27903: slots with multiple start or end lifetime ops are not + // 1) PR27903: slots with multiple start or end lifetime ops are not // safe to enable for "lifetime-start-on-first-use". - for (unsigned slot = 0; slot < NumSlot; ++slot) - if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) + // 2) And also not safe for variable X in catch(X) in windows. + for (unsigned slot = 0; slot < NumSlot; ++slot) { + if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 || + (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot))) ConservativeSlots.set(slot); + } LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); // Step 2: compute begin/end sets for each block diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1e0be249e525ae..560e362b074b58 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -94,6 +94,14 @@ def adjust_icmp_imm : GICombineRule < def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>; +def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple">; +def extractvecelt_pairwise_add : GICombineRule< + (defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo), + (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, + [{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -110,6 +118,7 @@ def AArch64PostLegalizerCombinerHelper [copy_prop, erase_undef_store, combines_for_extload, sext_trunc_sextload, hoist_logic_op_with_same_opcode_hands, - and_trivial_mask, xor_of_and_with_same_reg]> { + and_trivial_mask, xor_of_and_with_same_reg, + extractvecelt_pairwise_add]> { let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 79b563e345a80c..1bd9ce25125d9b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -135,4 +135,9 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; \ No newline at end of file +def : GINodeEquiv; + +// These are patterns that we only use for GlobalISel via the importer. +def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)), + (vector_extract (v2f32 FPR64:$Rn), (i64 1)))), + (f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>; \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 4f3938852a4007..17520ded4ba73b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -24,8 +24,11 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -33,6 +36,74 @@ using namespace llvm; +/// This combine tries do what performExtractVectorEltCombine does in SDAG. +/// Rewrite for pairwise fadd pattern +/// (s32 (g_extract_vector_elt +/// (g_fadd (vXs32 Other) +/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0)) +/// -> +/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0) +/// (g_extract_vector_elt (vXs32 Other) 1)) +bool matchExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, + std::tuple &MatchInfo) { + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto Cst = getConstantVRegValWithLookThrough(Src2, MRI); + if (!Cst || Cst->Value != 0) + return false; + // SDAG also checks for FullFP16, but this looks to be beneficial anyway. + + // Now check for an fadd operation. TODO: expand this for integer add? + auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI); + if (!FAddMI) + return false; + + // If we add support for integer add, must restrict these types to just s64. + unsigned DstSize = DstTy.getSizeInBits(); + if (DstSize != 16 && DstSize != 32 && DstSize != 64) + return false; + + Register Src1Op1 = FAddMI->getOperand(1).getReg(); + Register Src1Op2 = FAddMI->getOperand(2).getReg(); + MachineInstr *Shuffle = + getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI); + MachineInstr *Other = MRI.getVRegDef(Src1Op1); + if (!Shuffle) { + Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI); + Other = MRI.getVRegDef(Src1Op2); + } + + // We're looking for a shuffle that moves the second element to index 0. + if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 && + Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) { + std::get<0>(MatchInfo) = TargetOpcode::G_FADD; + std::get<1>(MatchInfo) = DstTy; + std::get<2>(MatchInfo) = Other->getOperand(0).getReg(); + return true; + } + return false; +} + +bool applyExtractVecEltPairwiseAdd( + MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, + std::tuple &MatchInfo) { + unsigned Opc = std::get<0>(MatchInfo); + assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!"); + // We want to generate two extracts of elements 0 and 1, and add them. + LLT Ty = std::get<1>(MatchInfo); + Register Src = std::get<2>(MatchInfo); + LLT s64 = LLT::scalar(64); + B.setInstrAndDebugLoc(MI); + auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0)); + auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1)); + B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir new file mode 100644 index 00000000000000..790634563068a0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizercombiner-extractvec-faddp.mir @@ -0,0 +1,188 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: f64_faddp +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $d0 = COPY [[FADD]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f64_faddp_commuted +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp_commuted + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $d0 = COPY [[FADD]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %0, %1 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f32_faddp +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $s0 = COPY [[FADD]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef) + %3:_(<2 x s32>) = G_FADD %1, %0 + %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64) + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: f32_faddp_commuted +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp_commuted + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64) + ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]] + ; CHECK: $s0 = COPY [[FADD]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef) + %3:_(<2 x s32>) = G_FADD %0, %1 + %4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64) + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: wrong_extract_idx +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: wrong_extract_idx + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(1, undef) + ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]] + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64) + ; CHECK: $d0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 1 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: wrong_shuffle_mask +alignment: 4 +legalized: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: wrong_shuffle_mask + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(0, undef) + ; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]] + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64) + ; CHECK: $d0 = COPY [[EVEC]](s64) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s64>) = COPY $q0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %5:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, undef) + %3:_(<2 x s64>) = G_FADD %1, %0 + %4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64) + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir new file mode 100644 index 00000000000000..770630851d1b2b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-faddp.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s +--- +name: f64_faddp +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: f64_faddp + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]] + ; CHECK: $d0 = COPY [[FADDPv2i64p]] + ; CHECK: RET_ReallyLR implicit $d0 + %0:fpr(<2 x s64>) = COPY $q0 + %6:gpr(s64) = G_CONSTANT i64 0 + %7:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %6(s64) + %8:gpr(s64) = G_CONSTANT i64 1 + %9:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %8(s64) + %4:fpr(s64) = G_FADD %7, %9 + $d0 = COPY %4(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: f32_faddp +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: f32_faddp + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]] + ; CHECK: $s0 = COPY [[FADDPv2i32p]] + ; CHECK: RET_ReallyLR implicit $s0 + %0:fpr(<2 x s32>) = COPY $d0 + %6:gpr(s64) = G_CONSTANT i64 0 + %7:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %6(s64) + %8:gpr(s64) = G_CONSTANT i64 1 + %9:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s64) + %4:fpr(s32) = G_FADD %7, %9 + $s0 = COPY %4(s32) + RET_ReallyLR implicit $s0 + +... diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll index 6850846fec0686..3a768d0e3f9b49 100644 --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg ; A53-NEXT: mov x19, x8 ; A53-NEXT: mov w0, w1 ; A53-NEXT: mov w9, #256 +; A53-NEXT: stp x2, x3, [x8, #32] +; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x19, #16]! ; A53-NEXT: str w1, [x19] ; A53-NEXT: mov w1, #4 -; A53-NEXT: stp x2, x3, [x8, #32] -; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x8] ; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll index 9942d6df99a4b8..693f33553591ab 100644 --- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll +++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll @@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) { ; CHECK-NEXT: vmov.32 r3, d16[1] ; CHECK-NEXT: vmov.32 r1, d16[0] ; CHECK-NEXT: subs r12, r12, #1 +; CHECK-NEXT: str r12, [r0, #12] ; CHECK-NEXT: sbcs r2, r2, #0 +; CHECK-NEXT: str r2, [r0, #8] ; CHECK-NEXT: sbcs r3, r3, #0 ; CHECK-NEXT: sbc r1, r1, #0 ; CHECK-NEXT: stm r0, {r1, r3} -; CHECK-NEXT: str r2, [r0, #8] -; CHECK-NEXT: str r12, [r0, #12] ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index a43f564951e93d..4fe8877aa8bd46 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1094,6 +1094,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: ldrd r11, r8, [r12, #24] ; CHECK-NEXT: vstrb.8 q0, [r9], #16 ; CHECK-NEXT: vldrw.u32 q0, [r5], #32 +; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: vldrw.u32 q1, [r5, #-28] ; CHECK-NEXT: vmul.f32 q0, q0, r7 ; CHECK-NEXT: vldrw.u32 q6, [r5, #-24] @@ -1105,13 +1106,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: vfma.f32 q0, q4, r6 ; CHECK-NEXT: vldrw.u32 q3, [r5, #-8] ; CHECK-NEXT: vfma.f32 q0, q5, r3 -; CHECK-NEXT: vldrw.u32 q1, [r5, #-4] -; CHECK-NEXT: vfma.f32 q0, q2, lr ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: vfma.f32 q0, q2, lr +; CHECK-NEXT: vldrw.u32 q1, [r5, #-4] ; CHECK-NEXT: vfma.f32 q0, q3, r11 -; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill -; CHECK-NEXT: vfma.f32 q0, q1, r8 ; CHECK-NEXT: cmp r0, #16 +; CHECK-NEXT: vfma.f32 q0, q1, r8 ; CHECK-NEXT: blo .LBB16_7 ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll index e7d6a7323bc1e3..0fe26fbc475346 100644 --- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll +++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll @@ -168,16 +168,14 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov q1, q4 ; CHECK-NEXT: vmov s1, r7 ; CHECK-NEXT: vmov.32 q1[1], r6 -; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: vmov.32 q1[2], r5 ; CHECK-NEXT: vmov.32 q5[0], r7 +; CHECK-NEXT: vmov.32 q1[2], r5 +; CHECK-NEXT: vmov s9, r4 ; CHECK-NEXT: vmov.32 q1[3], r4 -; CHECK-NEXT: strd r0, r10, [sp, #24] +; CHECK-NEXT: vdup.32 q6, r7 ; CHECK-NEXT: vstrw.32 q1, [sp, #76] ; CHECK-NEXT: vmov q1, q5 -; CHECK-NEXT: vmov s9, r4 ; CHECK-NEXT: vmov.32 q1[1], r7 -; CHECK-NEXT: vdup.32 q6, r7 ; CHECK-NEXT: vmov.f32 s2, s1 ; CHECK-NEXT: vmov.f32 s8, s0 ; CHECK-NEXT: vmov.32 q1[2], r6 @@ -185,6 +183,7 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov q7, q6 ; CHECK-NEXT: vmov.f32 s10, s1 ; CHECK-NEXT: mov.w r8, #4 +; CHECK-NEXT: mov.w r10, #0 ; CHECK-NEXT: vmov.32 q1[3], r4 ; CHECK-NEXT: vmov.32 q3[0], r4 ; CHECK-NEXT: vmov.32 q7[1], r4 @@ -192,6 +191,7 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: vmov.f32 s11, s3 ; CHECK-NEXT: movs r1, #64 ; CHECK-NEXT: strh.w r8, [sp, #390] +; CHECK-NEXT: strd r0, r10, [sp, #24] ; CHECK-NEXT: vstrw.32 q0, [sp, #44] ; CHECK-NEXT: str r0, [r0] ; CHECK-NEXT: vstrw.32 q2, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll index 600c5279ca9173..1ae74c1738c79f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) { ; CHECK-NEXT: vmov.f32 s9, s6 ; CHECK-NEXT: vmov.f32 s10, s0 ; CHECK-NEXT: vmov.f32 s11, s5 -; CHECK-NEXT: strd r2, r0, [r1, #16] ; CHECK-NEXT: vstrw.32 q2, [r1] +; CHECK-NEXT: strd r2, r0, [r1, #16] ; CHECK-NEXT: pop {r4, pc} entry: %s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0 diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll index ac1c814b838ea7..f57c9226179b5e 100644 --- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; THUMBV7-NEXT: .pad #44 ; THUMBV7-NEXT: sub sp, #44 -; THUMBV7-NEXT: ldrd r4, r7, [sp, #88] -; THUMBV7-NEXT: mov r5, r3 ; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill ; THUMBV7-NEXT: movs r0, #0 -; THUMBV7-NEXT: strd r4, r7, [sp] -; THUMBV7-NEXT: mov r1, r3 +; THUMBV7-NEXT: ldrd r4, r7, [sp, #88] +; THUMBV7-NEXT: mov r5, r3 ; THUMBV7-NEXT: strd r0, r0, [sp, #8] +; THUMBV7-NEXT: mov r1, r3 ; THUMBV7-NEXT: mov r6, r2 ; THUMBV7-NEXT: mov r0, r2 ; THUMBV7-NEXT: movs r2, #0 ; THUMBV7-NEXT: movs r3, #0 +; THUMBV7-NEXT: strd r4, r7, [sp] ; THUMBV7-NEXT: bl __multi3 ; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill ; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill diff --git a/llvm/test/CodeGen/X86/pr48064.mir b/llvm/test/CodeGen/X86/pr48064.mir new file mode 100644 index 00000000000000..8ddfdec9b5903a --- /dev/null +++ b/llvm/test/CodeGen/X86/pr48064.mir @@ -0,0 +1,435 @@ +# RUN: llc -mtriple="i386-pc-windows-msvc" -run-pass=stack-coloring %s -o - | FileCheck %s + +# There is a problem with the exception handler, we found in windows, when set +# LifetimeStartOnFirstUse=true for stack-coloring in default. Take the following +# case for example: +# +#// Compile with "clang-cl -m32 -O2 -EHs test.cpp" +#__attribute__((noinline,nothrow,weak)) void escape(int *p) { } +#struct object { +# int i; +# object() { +# i = 1; +# } +# ~object() { +# // if "object" and "exp" are assigned to the same slot, +# // this assign will corrupt "exp". +# i = 9999; +# escape(&i); +# } +#}; +#inline void throwit() { throw 999; } +# +#volatile int v; +#inline void func() { +# try { +# object o; +# throwit(); +# } +# // "exp" is written by the OS when the "throw" occurs. +# // Then the destructor is called, and the store-assign +# // clobbers the value of "exp". +# // The dereference of "exp" (with value 9999) causes a crash. +# // All these done in libruntime, so it is hard to check in IR. +# catch (int &exp) { +# v = exp; +# } +#} +# +#int main() { +# func(); +# return 0; +#} + +## Make sure that o.i not merge with exp.i +# CHECK: stack: +# CHECK: id: 2, name: o.i, type: default, offset: 0, size: 4, alignment: 4, +# CHECK: id: 3, name: exp.i, type: default, offset: 0, size: 4, alignment: 4, + +## Make sure that %stack.3.exp.i not replaced with %stack.2.o.i +# CHECK: bb.3.catch.i (landing-pad, ehfunclet-entry): +# CHECK: %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.exp.i) + +--- | + ; ModuleID = 'test-pre-stc.mir' + source_filename = "test.cpp" + target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" + + %rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] } + %eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* } + %eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] } + %eh.ThrowInfo = type { i32, i8*, i8*, i8* } + %CXXExceptionRegistration = type { i8*, %EHRegistrationNode, i32 } + %EHRegistrationNode = type { %EHRegistrationNode*, i8* } + %struct.object = type { i32 } + + $"_R0H@8" = comdat any + + $"_CT_R0H@84" = comdat any + + $_CTA1H = comdat any + + $_TI1H = comdat any + + @v__3HC = dso_local global i32 0, align 4 + @"_7type_info__6B@" = external constant i8* + @"_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"_7type_info__6B@", i8* null, [3 x i8] c".H\00" }, comdat + @"_CT_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"_R0H@8" to i8*), i32 0, i32 -1, i32 0, i32 4, i8* null }, section ".xdata", comdat + @_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT_R0H@84"] }, section ".xdata", comdat + @_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat + + ; Function Attrs: noinline nounwind sspstrong + define weak dso_local void @"escape__YAXPAH@Z"(i32* %p) local_unnamed_addr #0 { + entry: + ret void + } + + ; Function Attrs: norecurse sspstrong + define dso_local i32 @main() local_unnamed_addr #1 personality i32 (...)* @__CxxFrameHandler3 { + entry: + %0 = alloca %CXXExceptionRegistration, align 4 + %1 = bitcast %CXXExceptionRegistration* %0 to i8* + call void @llvm.x86.seh.ehregnode(i8* %1) + %2 = call i8* @llvm.stacksave() + %3 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %0, i32 0, i32 0 + store i8* %2, i8** %3, align 4 + %4 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %0, i32 0, i32 2 + store i32 -1, i32* %4, align 4 + %5 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %0, i32 0, i32 1 + %6 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %5, i32 0, i32 1 + store i8* bitcast (i32 (i8*, i8*, i8*, i8*)* @"__ehhandler$main" to i8*), i8** %6, align 4 + %7 = load %EHRegistrationNode*, %EHRegistrationNode* addrspace(257)* null, align 4 + %8 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %5, i32 0, i32 0 + store %EHRegistrationNode* %7, %EHRegistrationNode** %8, align 4 + store %EHRegistrationNode* %5, %EHRegistrationNode* addrspace(257)* null, align 4 + %tmp.i.i = alloca i32, align 4 + %o.i = alloca %struct.object, align 4 + %zx = alloca i32*, align 4 + %exp.i = alloca i32*, align 4 + %9 = bitcast i32** %exp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %9) + %10 = bitcast %struct.object* %o.i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %10) #7 + %i.i.i1 = bitcast %struct.object* %o.i to i32* + store i32 1, i32* %i.i.i1, align 4 + %11 = bitcast i32* %tmp.i.i to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %11) + store i32 999, i32* %tmp.i.i, align 4 + %12 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %0, i32 0, i32 2 + store i32 1, i32* %12, align 4 + invoke void @_CxxThrowException(i8* nonnull %11, %eh.ThrowInfo* nonnull @_TI1H) #8 + to label %.noexc.i unwind label %ehcleanup.i + + .noexc.i: ; preds = %entry + unreachable + + ehcleanup.i: ; preds = %entry + %13 = cleanuppad within none [] + %14 = bitcast %struct.object* %o.i to i32* + %15 = bitcast %struct.object* %o.i to i8* + store i32 9999, i32* %14, align 4 + call void @"escape__YAXPAH@Z"(i32* nonnull %14) #7 [ "funclet"(token %13) ] + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %15) #7 + cleanupret from %13 unwind label %catch.dispatch.i + + catch.dispatch.i: ; preds = %ehcleanup.i + %16 = catchswitch within none [label %catch.i] unwind to caller + + catch.i: ; preds = %catch.dispatch.i + %17 = catchpad within %16 [%rtti.TypeDescriptor2* @"_R0H@8", i32 8, i32** %exp.i] + %18 = load i32*, i32** %exp.i, align 4 + %19 = load i32, i32* %18, align 4 + store atomic volatile i32 %19, i32* @v__3HC release, align 4 + catchret from %17 to label %func__YAXXZ.exit + + func__YAXXZ.exit: ; preds = %catch.i + %20 = bitcast i32** %exp.i to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %20) + %21 = getelementptr inbounds %CXXExceptionRegistration, %CXXExceptionRegistration* %0, i32 0, i32 1 + %22 = getelementptr inbounds %EHRegistrationNode, %EHRegistrationNode* %21, i32 0, i32 0 + %23 = load %EHRegistrationNode*, %EHRegistrationNode** %22, align 4 + store %EHRegistrationNode* %23, %EHRegistrationNode* addrspace(257)* null, align 4 + ret i32 0 + } + + ; Function Attrs: argmemonly nofree nosync nounwind willreturn + declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + + ; Function Attrs: nofree + declare dso_local i32 @__CxxFrameHandler3(...) #3 + + ; Function Attrs: argmemonly nofree nosync nounwind willreturn + declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + + ; Function Attrs: nofree + declare dso_local x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*) local_unnamed_addr #3 + + declare i32 @_setjmp3(i8*, i32, ...) + + ; Function Attrs: nofree nosync nounwind willreturn + declare i8* @llvm.stacksave() #4 + + define internal i32 @"__ehhandler$main"(i8* %0, i8* %1, i8* %2, i8* %3) #5 { + entry: + %4 = call i8* @llvm.x86.seh.lsda(i8* bitcast (i32 ()* @main to i8*)) + %5 = tail call i32 bitcast (i32 (...)* @__CxxFrameHandler3 to i32 (i8*, i8*, i8*, i8*, i8*)*)(i8* inreg %4, i8* %0, i8* %1, i8* %2, i8* %3) + ret i32 %5 + } + + ; Function Attrs: nounwind readnone + declare i8* @llvm.x86.seh.lsda(i8*) #6 + + declare x86_stdcallcc void @__CxxLongjmpUnwind(i8*) + + ; Function Attrs: nounwind + declare void @llvm.x86.seh.ehregnode(i8*) #7 + + attributes #0 = { noinline nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { norecurse sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { argmemonly nofree nosync nounwind willreturn } + attributes #3 = { nofree } + attributes #4 = { nofree nosync nounwind willreturn } + attributes #5 = { "safeseh" } + attributes #6 = { nounwind readnone } + attributes #7 = { nounwind } + attributes #8 = { noreturn } + + !llvm.linker.options = !{!0, !1, !2} + !llvm.module.flags = !{!3, !4} + !llvm.ident = !{!5} + + !0 = !{!"/DEFAULTLIB:libcmt.lib"} + !1 = !{!"/DEFAULTLIB:libmmt.lib"} + !2 = !{!"/DEFAULTLIB:oldnames.lib"} + !3 = !{i32 1, !"NumRegisterParameters", i32 0} + !4 = !{i32 1, !"wchar_size", i32 2} + !5 = !{!"Intel(R) oneAPI DPC++ Compiler Pro 2021.1 (YYYY.x.0.MMDD)"} + +... +--- +name: 'escape__YAXPAH@Z' +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + RET 0 + +... +--- +name: main +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr32, preferred-register: '' } + - { id: 1, class: gr32, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } + - { id: 3, class: gr32, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } + - { id: 6, class: gr32, preferred-register: '' } + - { id: 7, class: gr32, preferred-register: '' } + - { id: 8, class: gr32, preferred-register: '' } + - { id: 9, class: gr32, preferred-register: '' } + - { id: 10, class: gr32, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: true + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: zx, type: default, offset: 0, size: 16, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: tmp.i.i, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: o.i, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: exp.i, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x7ffff800), %bb.2(0x00000800) + + %0:gr32 = COPY $esp + MOV32mr %stack.0.zx, 1, $noreg, 0, $noreg, %0 :: (store 4 into %ir.3) + MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, -1 :: (store 4 into %ir.4) + %1:gr32 = nuw LEA32r %stack.0.zx, 1, $noreg, 4, $noreg + MOV32mi %stack.0.zx, 1, $noreg, 8, $noreg, @"__ehhandler$main" :: (store 4 into %ir.6) + %2:gr32 = MOV32rm $noreg, 1, $noreg, 0, $fs :: (load 4 from `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + MOV32mr %stack.0.zx, 1, $noreg, 4, $noreg, killed %2 :: (store 4 into %ir.8) + MOV32mr $noreg, 1, $noreg, 0, $fs, killed %1 :: (store 4 into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 1 :: (store 4 into %ir.i.i.i1) + MOV32mi %stack.1.tmp.i.i, 1, $noreg, 0, $noreg, 999 :: (store 4 into %ir.tmp.i.i) + MOV32mi %stack.0.zx, 1, $noreg, 12, $noreg, 1 :: (store 4 into %ir.12) + ADJCALLSTACKDOWN32 8, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %3:gr32 = COPY $esp + %4:gr32 = LEA32r %stack.1.tmp.i.i, 1, $noreg, 0, $noreg + MOV32mr %3, 1, $noreg, 0, $noreg, killed %4 :: (store 4 into stack) + MOV32mi %3, 1, $noreg, 4, $noreg, @_TI1H :: (store 4 into stack + 4) + CALLpcrel32 @_CxxThrowException, csr_noregs, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 8, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + JMP_1 %bb.1 + + bb.1..noexc.i: + successors: + + + bb.2.ehcleanup.i (landing-pad, ehfunclet-entry): + successors: %bb.3(0x80000000) + + MOV32mi %stack.2.o.i, 1, $noreg, 0, $noreg, 9999 :: (store 4 into %ir.14) + ADJCALLSTACKDOWN32 4, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + %5:gr32 = COPY $esp + %6:gr32 = LEA32r %stack.2.o.i, 1, $noreg, 0, $noreg + MOV32mr %5, 1, $noreg, 0, $noreg, killed %6 :: (store 4 into stack) + CALLpcrel32 @"escape__YAXPAH@Z", csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ADJCALLSTACKUP32 4, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + CLEANUPRET + + bb.3.catch.i (landing-pad, ehfunclet-entry): + successors: %bb.4(0x80000000) + + %7:gr32 = MOV32rm %stack.3.exp.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.exp.i) + %8:gr32 = MOV32rm killed %7, 1, $noreg, 0, $noreg :: (load 4 from %ir.18) + MOV32mr $noreg, 1, $noreg, @v__3HC, $noreg, killed %8 :: (volatile store release 4 into @v__3HC) + CATCHRET %bb.4, %bb.0 + + bb.4.catch.i (landing-pad): + successors: %bb.5(0x80000000) + + JMP_4 %bb.5 + + bb.5.func__YAXXZ.exit: + %9:gr32 = MOV32rm %stack.0.zx, 1, $noreg, 4, $noreg :: (dereferenceable load 4 from %ir.22) + MOV32mr $noreg, 1, $noreg, 0, $fs, killed %9 :: (store 4 into `%EHRegistrationNode* addrspace(257)* null`, addrspace 257) + %10:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY %10 + RET 0, $eax + +... +--- +name: '__ehhandler$main' +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr32, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default, + isImmutable: false, isAliased: false, callee-saved-register: '', + callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, type: default, offset: 4, size: 4, alignment: 4, stack-id: default, + isImmutable: false, isAliased: false, callee-saved-register: '', + callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 2, type: default, offset: 8, size: 4, alignment: 4, stack-id: default, + isImmutable: false, isAliased: false, callee-saved-register: '', + callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 3, type: default, offset: 12, size: 4, alignment: 4, stack-id: default, + isImmutable: false, isAliased: false, callee-saved-register: '', + callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + %0:gr32 = MOV32ri + $eax = COPY %0 + TCRETURNdi @__CxxFrameHandler3, 0, csr_32, implicit $esp, implicit $ssp, implicit $eax + +... diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll index 674b8d8f93842a..6f088772436ec5 100644 --- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll +++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll @@ -17,13 +17,12 @@ cond_true2732.preheader: ; preds = %entry store i64 %tmp2676.us.us, i64* %tmp2666 ret i32 0 -; INTEL: and {{e..}}, dword ptr [356] -; INTEL: and dword ptr [360], {{e..}} -; FIXME: mov dword ptr [356], {{e..}} -; The above line comes out as 'mov 360, eax', but when the register is ecx it works? +; INTEL-DAG: and {{e..}}, dword ptr [356] +; INTEL-DAG: and dword ptr [360], {{e..}} +; INTEL: mov dword ptr [356], {{e..}} -; ATT: andl 356, %{{e..}} -; ATT: andl %{{e..}}, 360 +; ATT-DAG: andl 356, %{{e..}} +; ATT-DAG: andl %{{e..}}, 360 ; ATT: movl %{{e..}}, 356 }