diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 60b3f21b3e5004..073d9c86e22ff0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2408,20 +2408,6 @@ with ``__has_feature(cxx_constexpr_string_builtins)``. Memory builtins --------------- - * ``__builtin_memcpy_inline`` - -.. code-block:: c - - void __builtin_memcpy_inline(void *dst, const void *src, size_t size); - -``__builtin_memcpy_inline(dst, src, size)`` is identical to -``__builtin_memcpy(dst, src, size)`` except that the generated code is -guaranteed not to call any external functions. See LLVM IR `llvm.memcpy.inline -`_ Intrinsic -for more information. - -Note that the `size` argument must be a compile time constant. - Clang provides constant expression evaluation support for builtin forms of the following functions from the C standard library headers ```` and ````: @@ -2439,7 +2425,27 @@ are pointers to arrays with the same trivially copyable element type, and the given size is an exact multiple of the element size that is no greater than the number of elements accessible through the source and destination operands. -Constant evaluation support is not yet provided for ``__builtin_memcpy_inline``. +Guaranteed inlined copy +^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: c + + void __builtin_memcpy_inline(void *dst, const void *src, size_t size); + + +``__builtin_memcpy_inline`` has been designed as a building block for efficient +``memcpy`` implementations. It is identical to ``__builtin_memcpy`` but also +guarantees not to call any external functions. See LLVM IR `llvm.memcpy.inline +`_ Intrinsic +for more information. + +This is useful to implement a custom version of ``memcpy``, implemement a +``libc`` memcpy or work around the absence of a ``libc``. + +Note that the `size` argument must be a compile time constant. + +Note that this intrinsic cannot yet be called in a ``constexpr`` context. + Atomic Min/Max builtins with memory ordering -------------------------------------------- diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h index 53cffccce8c1ad..f05f5f1641cd04 100644 --- a/llvm/include/llvm/MC/MCWinEH.h +++ b/llvm/include/llvm/MC/MCWinEH.h @@ -26,6 +26,14 @@ struct Instruction { Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off) : Label(L), Offset(Off), Register(Reg), Operation(Op) {} + + bool operator==(const Instruction &I) const { + // Check whether two instructions refer to the same operation + // applied at a different spot (i.e. pointing at a different label). + return Offset == I.Offset && Register == I.Register && + Operation == I.Operation; + } + bool operator!=(const Instruction &I) const { return !(*this == I); } }; struct FrameInfo { diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 795e2770bbe185..76e217c8997453 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -17,8 +17,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/CommandLine.h" #include #include @@ -26,6 +29,7 @@ namespace llvm { class Module; +class Function; /// Instrumentation to print IR before/after passes. /// @@ -73,6 +77,53 @@ class PrintPassInstrumentation { bool DebugLogging; }; +class PreservedCFGCheckerInstrumentation { +private: + // CFG is a map BB -> {(Succ, Multiplicity)}, where BB is a non-leaf basic + // block, {(Succ, Multiplicity)} set of all pairs of the block's successors + // and the multiplicity of the edge (BB->Succ). As the mapped sets are + // unordered the order of successors is not tracked by the CFG. In other words + // this allows basic block successors to be swapped by a pass without + // reporting a CFG change. CFG can be guarded by basic block tracking pointers + // in the Graph (BBGuard). That is if any of the block is deleted or RAUWed + // then the CFG is treated poisoned and no block pointer of the Graph is used. + struct CFG { + struct BBGuard final : public CallbackVH { + BBGuard(const BasicBlock *BB) : CallbackVH(BB) {} + void deleted() override { CallbackVH::deleted(); } + void allUsesReplacedWith(Value *) override { CallbackVH::deleted(); } + bool isPoisoned() const { return !getValPtr(); } + }; + + Optional> BBGuards; + DenseMap> Graph; + + CFG(const Function *F, bool TrackBBLifetime = false); + + bool operator==(const CFG &G) const { + return !isPoisoned() && !G.isPoisoned() && Graph == G.Graph; + } + + bool isPoisoned() const { + if (BBGuards) + for (auto &BB : *BBGuards) { + if (BB.second.isPoisoned()) + return true; + } + return false; + } + + static void printDiff(raw_ostream &out, const CFG &Before, + const CFG &After); + }; + + SmallVector>, 8> GraphStackBefore; + +public: + static cl::opt VerifyPreservedCFG; + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -80,6 +131,7 @@ class StandardInstrumentations { PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; OptNoneInstrumentation OptNone; + PreservedCFGCheckerInstrumentation PreservedCFGChecker; public: StandardInstrumentations(bool DebugLogging) : PrintPass(DebugLogging) {} diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index cd8077e7d54865..c47ac7e17b6a17 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -258,15 +258,6 @@ void WinException::endFuncletImpl() { if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); - // On funclet exit, we emit a fake "function" end marker, so that the call - // to EmitWinEHHandlerData below can calculate the size of the funclet or - // function. - if (isAArch64) { - MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( - Asm->OutStreamer->getCurrentSectionOnly()); - Asm->OutStreamer->SwitchSection(XData); - } - // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp index fb0de40fc6d5f4..a585b50828379d 100644 --- a/llvm/lib/MC/MCWin64EH.cpp +++ b/llvm/lib/MC/MCWin64EH.cpp @@ -264,8 +264,7 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, return value; } -static uint32_t -ARM64CountOfUnwindCodes(const std::vector &Insns) { +static uint32_t ARM64CountOfUnwindCodes(ArrayRef Insns) { uint32_t Count = 0; for (const auto &I : Insns) { switch (static_cast(I.Operation)) { @@ -544,6 +543,109 @@ FindMatchingEpilog(const std::vector& EpilogInstrs, return nullptr; } +static void simplifyOpcodes(std::vector &Instructions, + bool Reverse) { + unsigned PrevOffset = -1; + unsigned PrevRegister = -1; + + auto VisitInstruction = [&](WinEH::Instruction &Inst) { + // Convert 2-byte opcodes into equivalent 1-byte ones. + if (Inst.Operation == Win64EH::UOP_SaveRegP && Inst.Register == 29) { + Inst.Operation = Win64EH::UOP_SaveFPLR; + Inst.Register = -1; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && + Inst.Register == 29) { + Inst.Operation = Win64EH::UOP_SaveFPLRX; + Inst.Register = -1; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX && + Inst.Register == 19 && Inst.Offset <= 248) { + Inst.Operation = Win64EH::UOP_SaveR19R20X; + Inst.Register = -1; + } else if (Inst.Operation == Win64EH::UOP_AddFP && Inst.Offset == 0) { + Inst.Operation = Win64EH::UOP_SetFP; + } else if (Inst.Operation == Win64EH::UOP_SaveRegP && + Inst.Register == PrevRegister + 2 && + Inst.Offset == PrevOffset + 16) { + Inst.Operation = Win64EH::UOP_SaveNext; + Inst.Register = -1; + Inst.Offset = 0; + // Intentionally not creating UOP_SaveNext for float register pairs, + // as current versions of Windows (up to at least 20.04) is buggy + // regarding SaveNext for float pairs. + } + // Update info about the previous instruction, for detecting if + // the next one can be made a UOP_SaveNext + if (Inst.Operation == Win64EH::UOP_SaveR19R20X) { + PrevOffset = 0; + PrevRegister = 19; + } else if (Inst.Operation == Win64EH::UOP_SaveRegPX) { + PrevOffset = 0; + PrevRegister = Inst.Register; + } else if (Inst.Operation == Win64EH::UOP_SaveRegP) { + PrevOffset = Inst.Offset; + PrevRegister = Inst.Register; + } else if (Inst.Operation == Win64EH::UOP_SaveNext) { + PrevRegister += 2; + PrevOffset += 16; + } else { + PrevRegister = -1; + PrevOffset = -1; + } + }; + + // Iterate over instructions in a forward order (for prologues), + // backwards for epilogues (i.e. always reverse compared to how the + // opcodes are stored). + if (Reverse) { + for (auto It = Instructions.rbegin(); It != Instructions.rend(); It++) + VisitInstruction(*It); + } else { + for (WinEH::Instruction &Inst : Instructions) + VisitInstruction(Inst); + } +} + +static int checkPackedEpilog(MCStreamer &streamer, WinEH::FrameInfo *info, + int PrologCodeBytes) { + // Can only pack if there's one single epilog + if (info->EpilogMap.size() != 1) + return -1; + + const std::vector &Epilog = + info->EpilogMap.begin()->second; + + // Can pack if the epilog is a subset of the prolog but not vice versa + if (Epilog.size() > info->Instructions.size()) + return -1; + + // Check that the epilog actually is a perfect match for the end (backwrds) + // of the prolog. + for (int I = Epilog.size() - 1; I >= 0; I--) { + if (info->Instructions[I] != Epilog[Epilog.size() - 1 - I]) + return -1; + } + + // Check that the epilog actually is at the very end of the function, + // otherwise it can't be packed. + uint32_t DistanceFromEnd = (uint32_t)GetAbsDifference( + streamer, info->FuncletOrFuncEnd, info->EpilogMap.begin()->first); + if (DistanceFromEnd / 4 != Epilog.size()) + return -1; + + int Offset = ARM64CountOfUnwindCodes( + ArrayRef(&info->Instructions[Epilog.size()], + info->Instructions.size() - Epilog.size())); + + // Check that the offset and prolog size fits in the first word; it's + // unclear whether the epilog count in the extension word can be taken + // as packed epilog offset. + if (Offset > 31 || PrologCodeBytes > 124) + return -1; + + info->EpilogMap.clear(); + return Offset; +} + // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { @@ -572,6 +674,10 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { return; } + simplifyOpcodes(info->Instructions, false); + for (auto &I : info->EpilogMap) + simplifyOpcodes(I.second, true); + MCContext &context = streamer.getContext(); MCSymbol *Label = context.createTempSymbol(); @@ -618,6 +724,8 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions); uint32_t TotalCodeBytes = PrologCodeBytes; + int PackedEpilogOffset = checkPackedEpilog(streamer, info, PrologCodeBytes); + // Process epilogs. MapVector EpilogInfo; // Epilogs processed so far. @@ -650,15 +758,17 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { uint32_t CodeWordsMod = TotalCodeBytes % 4; if (CodeWordsMod) CodeWords++; - uint32_t EpilogCount = info->EpilogMap.size(); + uint32_t EpilogCount = + PackedEpilogOffset >= 0 ? PackedEpilogOffset : info->EpilogMap.size(); bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124; if (!ExtensionWord) { row1 |= (EpilogCount & 0x1F) << 22; row1 |= (CodeWords & 0x1F) << 27; } - // E is always 0 right now, TODO: packed epilog setup if (info->HandlesExceptions) // X row1 |= 1 << 20; + if (PackedEpilogOffset >= 0) // E + row1 |= 1 << 21; row1 |= FuncLength & 0x3FFFF; streamer.emitInt32(row1); diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index da58fa57bdae76..2ee373b912be08 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -36,6 +36,14 @@ static cl::opt cl::desc("Enable skipping optional passes optnone functions " "under new pass manager")); +cl::opt PreservedCFGCheckerInstrumentation::VerifyPreservedCFG( + "verify-cfg-preserved", cl::Hidden, +#ifdef NDEBUG + cl::init(false)); +#else + cl::init(true)); +#endif + // FIXME: Change `-debug-pass-manager` from boolean to enum type. Similar to // `-debug-pass` in legacy PM. static cl::opt @@ -338,10 +346,166 @@ void PrintPassInstrumentation::registerCallbacks( }); } +PreservedCFGCheckerInstrumentation::CFG::CFG(const Function *F, + bool TrackBBLifetime) { + if (TrackBBLifetime) + BBGuards = DenseMap(F->size()); + for (const auto &BB : *F) { + if (BBGuards) + BBGuards->try_emplace(intptr_t(&BB), &BB); + for (auto *Succ : successors(&BB)) { + Graph[&BB][Succ]++; + if (BBGuards) + BBGuards->try_emplace(intptr_t(Succ), Succ); + } + } +} + +static void printBBName(raw_ostream &out, const BasicBlock *BB) { + if (BB->hasName()) { + out << BB->getName() << "<" << BB << ">"; + return; + } + + if (!BB->getParent()) { + out << "unnamed_removed<" << BB << ">"; + return; + } + + if (BB == &BB->getParent()->getEntryBlock()) { + out << "entry" + << "<" << BB << ">"; + return; + } + + unsigned FuncOrderBlockNum = 0; + for (auto &FuncBB : *BB->getParent()) { + if (&FuncBB == BB) + break; + FuncOrderBlockNum++; + } + out << "unnamed_" << FuncOrderBlockNum << "<" << BB << ">"; +} + +void PreservedCFGCheckerInstrumentation::CFG::printDiff(raw_ostream &out, + const CFG &Before, + const CFG &After) { + assert(!After.isPoisoned()); + + // Print function name. + const CFG *FuncGraph = nullptr; + if (!After.Graph.empty()) + FuncGraph = &After; + else if (!Before.isPoisoned() && !Before.Graph.empty()) + FuncGraph = &Before; + + if (FuncGraph) + out << "In function @" + << FuncGraph->Graph.begin()->first->getParent()->getName() << "\n"; + + if (Before.isPoisoned()) { + out << "Some blocks were deleted\n"; + return; + } + + // Find and print graph differences. + if (Before.Graph.size() != After.Graph.size()) + out << "Different number of non-leaf basic blocks: before=" + << Before.Graph.size() << ", after=" << After.Graph.size() << "\n"; + + for (auto &BB : Before.Graph) { + auto BA = After.Graph.find(BB.first); + if (BA == After.Graph.end()) { + out << "Non-leaf block "; + printBBName(out, BB.first); + out << " is removed (" << BB.second.size() << " successors)\n"; + } + } + + for (auto &BA : After.Graph) { + auto BB = Before.Graph.find(BA.first); + if (BB == Before.Graph.end()) { + out << "Non-leaf block "; + printBBName(out, BA.first); + out << " is added (" << BA.second.size() << " successors)\n"; + continue; + } + + if (BB->second == BA.second) + continue; + + out << "Different successors of block "; + printBBName(out, BA.first); + out << " (unordered):\n"; + out << "- before (" << BB->second.size() << "): "; + for (auto &SuccB : BB->second) { + printBBName(out, SuccB.first); + if (SuccB.second != 1) + out << "(" << SuccB.second << "), "; + else + out << ", "; + } + out << "\n"; + out << "- after (" << BA.second.size() << "): "; + for (auto &SuccA : BA.second) { + printBBName(out, SuccA.first); + if (SuccA.second != 1) + out << "(" << SuccA.second << "), "; + else + out << ", "; + } + out << "\n"; + } +} + +void PreservedCFGCheckerInstrumentation::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if (!VerifyPreservedCFG) + return; + + PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) { + if (any_isa(IR)) + GraphStackBefore.emplace_back(P, CFG(any_cast(IR))); + else + GraphStackBefore.emplace_back(P, None); + }); + + PIC.registerAfterPassInvalidatedCallback( + [this](StringRef P, const PreservedAnalyses &PassPA) { + auto Before = GraphStackBefore.pop_back_val(); + assert(Before.first == P && + "Before and After callbacks must correspond"); + (void)Before; + }); + + PIC.registerAfterPassCallback([this](StringRef P, Any IR, + const PreservedAnalyses &PassPA) { + auto Before = GraphStackBefore.pop_back_val(); + assert(Before.first == P && "Before and After callbacks must correspond"); + auto &GraphBefore = Before.second; + + if (!PassPA.allAnalysesInSetPreserved()) + return; + + if (any_isa(IR)) { + assert(GraphBefore && "Must be built in BeforePassCallback"); + CFG GraphAfter(any_cast(IR), false /* NeedsGuard */); + if (GraphAfter == *GraphBefore) + return; + + dbgs() << "Error: " << P + << " reported it preserved CFG, but changes detected:\n"; + CFG::printDiff(dbgs(), *GraphBefore, GraphAfter); + report_fatal_error(Twine("Preserved CFG changed by ", P)); + } + }); +} + void StandardInstrumentations::registerCallbacks( PassInstrumentationCallbacks &PIC) { PrintIR.registerCallbacks(PIC); PrintPass.registerCallbacks(PIC); TimePasses.registerCallbacks(PIC); OptNone.registerCallbacks(PIC); + PreservedCFGChecker.registerCallbacks(PIC); } diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 08a29bbb3e87a0..502966c6336767 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -5725,7 +5725,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegX(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::LR) || + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveRegP(Reg, Offset); @@ -5737,7 +5737,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveRegP(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveRegPX(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::X28) || + if (parseRegisterInRange(Reg, AArch64::X0, AArch64::X19, AArch64::FP) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveRegPX(Reg, Offset); @@ -5789,7 +5789,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegX(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || + if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveFRegP(Reg, Offset); @@ -5801,7 +5801,7 @@ bool AArch64AsmParser::parseDirectiveSEHSaveFRegP(SMLoc L) { bool AArch64AsmParser::parseDirectiveSEHSaveFRegPX(SMLoc L) { unsigned Reg; int64_t Offset; - if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D15) || + if (parseRegisterInRange(Reg, AArch64::D0, AArch64::D8, AArch64::D14) || parseComma() || parseImmExpr(Offset)) return true; getTargetStreamer().EmitARM64WinCFISaveFRegPX(Reg, Offset); diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index f71a2b9e003a91..e47ecb4fbb44ae 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -199,7 +199,6 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, case CmpInst::ICMP_SLE: case CmpInst::ICMP_SGE: Pred = CmpInst::getInversePredicate(Pred); - std::swap(A, B); Inversed = true; break; default: diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 30726627bc8295..7ff21d7ee9ef61 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2061,7 +2061,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, dyn_cast(AI->getArraySize())) { auto &DL = Caller->getParent()->getDataLayout(); Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); // Don't add markers for zero-sized allocas. @@ -2070,9 +2070,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (AllocaArraySize != std::numeric_limits::max() && + if (!AllocaTypeSize.isScalable() && + AllocaArraySize != std::numeric_limits::max() && std::numeric_limits::max() / AllocaArraySize >= - AllocaTypeSize) { + AllocaTypeSize.getFixedSize()) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll index 0c61bcd52366a8..1983b2568cdee1 100644 --- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll @@ -44,7 +44,6 @@ declare void @g(i32, i32) ; CHECK: .word .LBB0_3-.LJTI0_0 ; CHECK: .word .LBB0_4-.LJTI0_0 ; CHECK: .word .LBB0_5-.LJTI0_0 -; CHECK: .section .xdata,"dr" ; CHECK: .seh_handlerdata ; CHECK: .text ; CHECK: .seh_endproc diff --git a/llvm/test/CodeGen/AArch64/wineh-mingw.ll b/llvm/test/CodeGen/AArch64/wineh-mingw.ll index ff1a55711b9eae..d22c61fca75756 100644 --- a/llvm/test/CodeGen/AArch64/wineh-mingw.ll +++ b/llvm/test/CodeGen/AArch64/wineh-mingw.ll @@ -36,8 +36,7 @@ endtryfinally: ; WINEH: .seh_proc foo4 ; WINEH: .seh_handler _d_eh_personality, @unwind, @except ; WINEH: ret -; WINEH: .section .xdata,"dr" -; WINEH-NEXT: .seh_handlerdata +; WINEH: .seh_handlerdata ; WINEH-NEXT: .text ; WINEH-NEXT: .seh_endproc ; WINEH: .section .xdata,"dr" diff --git a/llvm/test/CodeGen/AArch64/wineh1.mir b/llvm/test/CodeGen/AArch64/wineh1.mir index aed1550c54f731..2f73a5291ddd09 100644 --- a/llvm/test/CodeGen/AArch64/wineh1.mir +++ b/llvm/test/CodeGen/AArch64/wineh1.mir @@ -73,7 +73,6 @@ # ASM: .seh_endepilogue # ASM: .seh_endfunclet -# ASM: .section .xdata,"dr" # ASM: .seh_handlerdata # ASM: .text # ASM: .seh_endproc diff --git a/llvm/test/CodeGen/AArch64/wineh3.mir b/llvm/test/CodeGen/AArch64/wineh3.mir index 6cbe7f42dc5ece..d1ffa4aedc0857 100644 --- a/llvm/test/CodeGen/AArch64/wineh3.mir +++ b/llvm/test/CodeGen/AArch64/wineh3.mir @@ -8,9 +8,9 @@ # CHECK-NEXT: FunctionLength: 124 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 32 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 16 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96] # CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80] @@ -21,22 +21,6 @@ # CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 23 -# CHECK-NEXT: EpilogueStartIndex: 15 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96] -# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80] -# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64] -# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48] -# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32] -# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16] -# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } ... --- diff --git a/llvm/test/CodeGen/AArch64/wineh6.mir b/llvm/test/CodeGen/AArch64/wineh6.mir index 95a11aa3c4e82b..e7592bd7114605 100644 --- a/llvm/test/CodeGen/AArch64/wineh6.mir +++ b/llvm/test/CodeGen/AArch64/wineh6.mir @@ -6,25 +6,19 @@ # CHECK-NEXT: FunctionLength: 92 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 8 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 1 +# CHECK-NEXT: ByteCodeLength: 4 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0x02 ; sub sp, #32 # CHECK-NEXT: 0xe1 ; mov fp, sp # CHECK-NEXT: 0x81 ; stp x29, x30, [sp, #-16]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 20 -# CHECK-NEXT: EpilogueStartIndex: 4 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe1 ; mov sp, fp -# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } +# CHECK-NEXT: Epilogue [ +# CHECK-NEXT: 0xe1 ; mov sp, fp +# CHECK-NEXT: 0x81 ; ldp x29, x30, [sp], #16 +# CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] # CHECK-NEXT: } ... diff --git a/llvm/test/CodeGen/AArch64/wineh7.mir b/llvm/test/CodeGen/AArch64/wineh7.mir index da64b3c002f3d7..6bf06d80861a42 100644 --- a/llvm/test/CodeGen/AArch64/wineh7.mir +++ b/llvm/test/CodeGen/AArch64/wineh7.mir @@ -6,9 +6,9 @@ # CHECK-NEXT: FunctionLength: 72 # CHECK-NEXT: Version: 0 # CHECK-NEXT: ExceptionData: No -# CHECK-NEXT: EpiloguePacked: No -# CHECK-NEXT: EpilogueScopes: 1 -# CHECK-NEXT: ByteCodeLength: 16 +# CHECK-NEXT: EpiloguePacked: Yes +# CHECK-NEXT: EpilogueOffset: 0 +# CHECK-NEXT: ByteCodeLength: 8 # CHECK-NEXT: Prologue [ # CHECK-NEXT: 0xe204 ; add fp, sp, #32 # CHECK-NEXT: 0x44 ; stp x29, x30, [sp, #32] @@ -16,19 +16,6 @@ # CHECK-NEXT: 0xcc85 ; stp x21, x22, [sp, #-48]! # CHECK-NEXT: 0xe4 ; end # CHECK-NEXT: ] -# CHECK-NEXT: EpilogueScopes [ -# CHECK-NEXT: EpilogueScope { -# CHECK-NEXT: StartOffset: 13 -# CHECK-NEXT: EpilogueStartIndex: 8 -# CHECK-NEXT: Opcodes [ -# CHECK-NEXT: 0xe204 ; sub sp, fp, #32 -# CHECK-NEXT: 0x44 ; ldp x29, x30, [sp, #32] -# CHECK-NEXT: 0xc802 ; ldp x19, x20, [sp, #16] -# CHECK-NEXT: 0xcc85 ; ldp x21, x22, [sp], #48 -# CHECK-NEXT: 0xe4 ; end -# CHECK-NEXT: ] -# CHECK-NEXT: } -# CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: } diff --git a/llvm/test/CodeGen/AMDGPU/sad.ll b/llvm/test/CodeGen/AMDGPU/sad.ll index 3a4a2d07772c12..464b413e655880 100644 --- a/llvm/test/CodeGen/AMDGPU/sad.ll +++ b/llvm/test/CodeGen/AMDGPU/sad.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -earlycse-debug-hash -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}v_sad_u32_pat1: ; GCN: v_sad_u32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll index 956845f5a5b359..38a765343fc74e 100644 --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -80,47 +80,93 @@ entry: ; CHECK: blr } -define i64 @cn32_1() #0 { +define i64 @uint32_1() #0 { entry: ret i64 3900000000 -; CHECK-LABEL: @cn32_1 +; CHECK-LABEL: @uint32_1 ; CHECK: lis [[REG1:[0-9]+]], 232 ; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 -; CHECK: sldi 3, [[REG1]], 8 +; CHECK: sldi 3, [[REG2]], 8 ; CHECK: blr } -define i32 @cn32_1_i32() #0 { +define i32 @uint32_1_i32() #0 { entry: ret i32 -394967296 -; CHECK-LABEL: @cn32_1_i32 +; CHECK-LABEL: @uint32_1_i32 ; CHECK: lis [[REG1:[0-9]+]], 232 ; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 30023 -; CHECK: sldi 3, [[REG1]], 8 +; CHECK: sldi 3, [[REG2]], 8 ; CHECK: blr } -define i64 @cn32_2() #0 { +define i64 @uint32_2() #0 { entry: ret i64 4294967295 -; CHECK-LABEL: @cn32_2 +; CHECK-LABEL: @uint32_2 ; CHECK: li [[REG1:[0-9]+]], 0 ; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori 3, [[REG2]], 65535 ; CHECK: blr } -define i32 @cn32_2_i32() #0 { +define i32 @uint32_2_i32() #0 { entry: ret i32 -1 -; CHECK-LABEL: @cn32_2_i32 +; CHECK-LABEL: @uint32_2_i32 ; CHECK: li [[REG1:[0-9]+]], 0 ; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65535 -; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 +; CHECK: ori 3, [[REG2]], 65535 +; CHECK: blr +} + +define i64 @uint32_3() #0 { +entry: + ret i64 2147483648 + +; CHECK-LABEL: @uint32_3 +; CHECK: li [[REG1:[0-9]+]], 1 +; CHECK: sldi 3, [[REG1]], 31 +; CHECK: blr +} + +define i64 @uint32_4() #0 { +entry: + ret i64 124800000032 + +; CHECK-LABEL: @uint32_4 +; CHECK: li [[REG1:[0-9]+]], 29 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 3752 +; CHECK: ori 3, [[REG3]], 57376 +; CHECK: blr +} + +define i64 @cn_ones_1() #0 { +entry: + ret i64 10460594175 + +; CHECK-LABEL: @cn_ones_1 +; CHECK: li [[REG1:[0-9]+]], 2 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28543 +; CHECK: ori 3, [[REG3]], 65535 +; CHECK: blr +} + +define i64 @cn_ones_2() #0 { +entry: + ret i64 10459119615 + +; CHECK-LABEL: @cn_ones_2 +; CHECK: li [[REG1:[0-9]+]], 2 +; CHECK: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK: oris [[REG3:[0-9]+]], [[REG2]], 28521 +; CHECK: ori 3, [[REG3]], 32767 ; CHECK: blr } diff --git a/llvm/test/MC/AArch64/seh-optimize.s b/llvm/test/MC/AArch64/seh-optimize.s new file mode 100644 index 00000000000000..0bf33af9cc75f6 --- /dev/null +++ b/llvm/test/MC/AArch64/seh-optimize.s @@ -0,0 +1,106 @@ +// This test checks that the unwinding opcodes are remapped to more +// efficient ones where possible. + +// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o +// RUN: llvm-readobj -u %t.o | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func +// CHECK-NEXT: ExceptionRecord: .xdata +// CHECK-NEXT: ExceptionData { +// CHECK: Prologue [ +// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16] +// CHECK-NEXT: 0xda07 ; stp d8, d9, [sp, #-64]! +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0x28 ; stp x19, x20, [sp, #-64]! +// CHECK-NEXT: 0xca49 ; stp x28, x29, [sp, #72] +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0xcc47 ; stp x20, x21, [sp, #-64]! +// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16] +// CHECK-NEXT: 0xca02 ; stp x27, x28, [sp, #16] +// CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! +// CHECK-NEXT: 0xce03 ; stp x27, x28, [sp, #-32]! +// CHECK-NEXT: 0xe1 ; mov fp, sp +// CHECK-NEXT: 0xe201 ; add fp, sp, #8 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: EpilogueScopes [ +// CHECK-NEXT: EpilogueScope { +// CHECK: Opcodes [ +// CHECK-NEXT: 0xc904 ; ldp x23, x24, [sp, #32] +// CHECK-NEXT: 0xe6 ; restore next +// CHECK-NEXT: 0xcc83 ; ldp x21, x22, [sp], #32 +// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32 +// CHECK-NEXT: 0xcc1f ; ldp x19, x20, [sp], #256 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] + + + .text + .globl func + .seh_proc func +func: + add x29, sp, #8 + .seh_add_fp 8 + add x29, sp, #0 + .seh_add_fp 0 + + stp x27, x28, [sp, #-32]! + .seh_save_regp_x x27, 32 + stp x29, x30, [sp, #-32]! + .seh_save_regp_x x29, 32 + + stp x27, x28, [sp, #16] + .seh_save_regp x27, 16 + stp x29, x30, [sp, #16] + .seh_save_regp x29, 16 + + stp x20, x21, [sp, #-64]! + .seh_save_regp_x x20, 64 + stp x22, x23, [sp, #16] + .seh_save_regp x22, 16 + stp x24, x25, [sp, #32] + .seh_save_next + stp x26, x27, [sp, #48] + .seh_save_regp x26, 48 + stp x28, x29, [sp, #72] + .seh_save_regp x28, 72 + + stp x19, x20, [sp, #-64]! + .seh_save_r19r20_x 64 + stp x21, x22, [sp, #16] + .seh_save_regp x21, 16 + + stp d8, d9, [sp, #-64]! + .seh_save_fregp_x d8, 64 + stp d10, d11, [sp, #16] + // This is intentionally not converted into a save_next, to avoid + // bugs in the windows unwinder. + .seh_save_fregp d10, 16 + + .seh_endprologue + + nop + + .seh_startepilogue + ldp x27, x28, [sp, #32] + .seh_save_regp x23, 32 + ldp x23, x24, [sp, #16] + .seh_save_regp x23, 16 + ldp x21, x22, [sp], #32 + .seh_save_regp_x x21, 32 + ldp x19, x20, [sp], #32 + .seh_save_regp_x x19, 32 + ldp x19, x20, [sp], #256 + .seh_save_regp_x x19, 256 + .seh_endepilogue + ret + .seh_endproc diff --git a/llvm/test/MC/AArch64/seh-packed-epilog.s b/llvm/test/MC/AArch64/seh-packed-epilog.s new file mode 100644 index 00000000000000..f9978ea7a1139f --- /dev/null +++ b/llvm/test/MC/AArch64/seh-packed-epilog.s @@ -0,0 +1,187 @@ +// This test checks that the epilogue is packed where possible. + +// RUN: llvm-mc -triple aarch64-pc-win32 -filetype=obj %s -o %t.o +// RUN: llvm-readobj -u %t.o | FileCheck %s + +// CHECK: UnwindInformation [ +// CHECK-NEXT: RuntimeFunction { +// CHECK-NEXT: Function: func +// CHECK-NEXT: ExceptionRecord: .xdata +// CHECK-NEXT: ExceptionData { +// CHECK-NEXT: FunctionLength: +// CHECK-NEXT: Version: +// CHECK-NEXT: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK-NEXT: EpilogueOffset: 2 +// CHECK-NEXT: ByteCodeLength: +// CHECK-NEXT: Prologue [ +// CHECK-NEXT: 0xdc04 ; str d8, [sp, #32] +// CHECK-NEXT: 0xe1 ; mov fp, sp +// CHECK-NEXT: 0x42 ; stp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; stp x29, x30, [sp, #-48]! +// CHECK-NEXT: 0xe6 ; save next +// CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! +// CHECK-NEXT: 0xc842 ; stp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; sub sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0xe1 ; mov sp, fp +// CHECK-NEXT: 0x42 ; ldp x29, x30, [sp, #16] +// CHECK-NEXT: 0x85 ; ldp x29, x30, [sp], #48 +// CHECK-NEXT: 0xe6 ; restore next +// CHECK-NEXT: 0x24 ; ldp x19, x20, [sp], #32 +// CHECK-NEXT: 0xc842 ; ldp x20, x21, [sp, #16] +// CHECK-NEXT: 0x03 ; add sp, #48 +// CHECK-NEXT: 0xe4 ; end +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: packed2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: Yes +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked1 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked2 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No +// CHECK: RuntimeFunction { +// CHECK-NEXT: Function: nonpacked3 +// CHECK-NEXT: ExceptionRecord: +// CHECK-NEXT: ExceptionData { +// CHECK: ExceptionData: +// CHECK-NEXT: EpiloguePacked: No + + .text + .globl func + .seh_proc func +func: + sub sp, sp, #48 + .seh_stackalloc 48 + // Check that canonical opcode forms (r19r20_x, fplr, fplr_x, save_next, + // set_fp) are treated as a match even if one (in prologue or epilogue) + // was simplified from the more generic opcodes. + stp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + stp x19, x20, [sp, #-32]! + .seh_save_r19r20_x 32 + stp x21, x22, [sp, #16] + .seh_save_regp x21, 16 + stp x29, x30, [sp, #-48]! + .seh_save_regp_x x29, 48 + stp x29, x30, [sp, #16] + .seh_save_regp x29, 16 + add x29, sp, #0 + .seh_add_fp 0 + str d8, [sp, #32] + .seh_save_freg d8, 32 + .seh_endprologue + + nop + + .seh_startepilogue + mov sp, x29 + .seh_set_fp + ldp x29, x30, [sp, #16] + .seh_save_fplr 16 + ldp x29, x30, [sp, #-48]! + .seh_save_fplr_x 48 + ldp x21, x22, [sp, #16] + .seh_save_next + ldp x19, x20, [sp], #32 + .seh_save_regp_x x19, 32 + ldp x20, x21, [sp, #16] + .seh_save_regp x20, 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + // Test a perfectly matching epilog with no offset. + .seh_proc packed2 +packed2: + sub sp, sp, #48 + .seh_stackalloc 48 + stp x29, lr, [sp, #-32]! + .seh_save_fplr_x 32 + .seh_endprologue + nop + .seh_startepilogue + ldp x29, lr, [sp], #32 + .seh_save_fplr_x 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + + .seh_proc nonpacked1 +nonpacked1: + sub sp, sp, #48 + .seh_stackalloc 48 + .seh_endprologue + + nop + .seh_startepilogue + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + // This epilogue isn't packed with the prologue, as it doesn't align with + // the end of the function (one extra nop before the ret). + nop + ret + .seh_endproc + + + .seh_proc nonpacked2 +nonpacked2: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue mismatches at the second opcode. + add sp, sp, #16 + .seh_stackalloc 16 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc + + .seh_proc nonpacked3 +nonpacked3: + sub sp, sp, #48 + .seh_stackalloc 48 + sub sp, sp, #32 + .seh_stackalloc 32 + .seh_endprologue + + nop + .seh_startepilogue + // Not packed; the epilogue is longer than the prologue. + mov sp, x29 + .seh_set_fp + add sp, sp, #32 + .seh_stackalloc 32 + add sp, sp, #48 + .seh_stackalloc 48 + .seh_endepilogue + ret + .seh_endproc diff --git a/llvm/test/MC/AArch64/seh.s b/llvm/test/MC/AArch64/seh.s index f7faa64b9309a8..0da956cbf2f5de 100644 --- a/llvm/test/MC/AArch64/seh.s +++ b/llvm/test/MC/AArch64/seh.s @@ -20,7 +20,7 @@ // CHECK-NEXT: } // CHECK: Section { // CHECK: Name: .xdata -// CHECK: RawDataSize: 56 +// CHECK: RawDataSize: 52 // CHECK: RelocationCount: 1 // CHECK: Characteristics [ // CHECK-NEXT: ALIGN_4BYTES @@ -41,7 +41,7 @@ // CHECK-NEXT: Relocations [ // CHECK-NEXT: Section (4) .xdata { -// CHECK-NEXT: 0x2C IMAGE_REL_ARM64_ADDR32NB __C_specific_handler +// CHECK-NEXT: 0x28 IMAGE_REL_ARM64_ADDR32NB __C_specific_handler // CHECK-NEXT: } // CHECK-NEXT: Section (5) .pdata { // CHECK-NEXT: 0x0 IMAGE_REL_ARM64_ADDR32NB func @@ -64,8 +64,8 @@ // CHECK-NEXT: 0xe202 ; add fp, sp, #16 // CHECK-NEXT: 0xdd41 ; str d13, [sp, #8] // CHECK-NEXT: 0xde83 ; str d12, [sp, #-32]! -// CHECK-NEXT: 0xd882 ; stp d10, d11, [sp, #16] -// CHECK-NEXT: 0xda03 ; stp d8, d9, [sp, #-32]! +// CHECK-NEXT: 0xd884 ; stp d10, d11, [sp, #32] +// CHECK-NEXT: 0xda05 ; stp d8, d9, [sp, #-48]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0x46 ; stp x29, x30, [sp, #48] // CHECK-NEXT: 0xd141 ; str x24, [sp, #8] @@ -74,21 +74,15 @@ // CHECK-NEXT: 0xc882 ; stp x21, x22, [sp, #16] // CHECK-NEXT: 0xd6c2 ; stp x25, lr, [sp, #16] // CHECK-NEXT: 0x24 ; stp x19, x20, [sp, #-32]! -// CHECK-NEXT: 0xcc03 ; stp x19, x20, [sp, #-32]! +// CHECK-NEXT: 0xcc83 ; stp x21, x22, [sp, #-32]! // CHECK-NEXT: 0x83 ; stp x29, x30, [sp, #-32]! // CHECK-NEXT: 0xe1 ; mov fp, sp // CHECK-NEXT: 0x01 ; sub sp, #16 // CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] -// CHECK-NEXT: EpilogueScopes [ -// CHECK-NEXT: EpilogueScope { -// CHECK-NEXT: StartOffset: 23 -// CHECK-NEXT: EpilogueStartIndex: 33 -// CHECK-NEXT: Opcodes [ -// CHECK-NEXT: 0x01 ; add sp, #16 -// CHECK-NEXT: 0xe4 ; end -// CHECK-NEXT: ] -// CHECK-NEXT: } +// CHECK-NEXT: Epilogue [ +// CHECK-NEXT: 0x01 ; add sp, #16 +// CHECK-NEXT: 0xe4 ; end // CHECK-NEXT: ] // CHECK-NEXT: ExceptionHandler [ // CHECK-NEXT: Routine: __C_specific_handler (0x0) @@ -113,8 +107,8 @@ func: .seh_set_fp stp x29, x30, [sp, #-32]! .seh_save_fplr_x 32 - stp x19, x20, [sp, #-32]! - .seh_save_regp_x x19, 32 + stp x21, x22, [sp, #-32]! + .seh_save_regp_x x21, 32 stp x19, x20, [sp, #-32]! .seh_save_r19r20_x 32 stp x25, x30, [sp, #16] @@ -131,10 +125,10 @@ func: .seh_save_fplr 48 stp x29, x30, [sp, #-32]! .seh_save_fplr_x 32 - stp d8, d9, [sp, #-32]! - .seh_save_fregp_x d8, 32 - stp d10, d11, [sp, #16] - .seh_save_fregp d10, 16 + stp d8, d9, [sp, #-48]! + .seh_save_fregp_x d8, 48 + stp d10, d11, [sp, #32] + .seh_save_fregp d10, 32 str d12, [sp, #-32]! .seh_save_freg_x d12, 32 str d13, [sp, #8] diff --git a/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll new file mode 100644 index 00000000000000..c355388ed836f5 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/sve-alloca-merge.ll @@ -0,0 +1,29 @@ +; RUN: opt -mtriple=aarch64--linux-gnu -mattr=+sve < %s -inline -S | FileCheck %s + +define void @bar(* %a) { +entry: + %b = alloca , align 16 + store zeroinitializer, * %b, align 16 + %c = load , * %a, align 16 + %d = load , * %b, align 16 + %e = add %c, %d + %f = add %e, %c + store %f, * %a, align 16 + ret void +} + +define i64 @foo() { +; CHECK-LABEL: @foo( +; CHECK: %0 = bitcast * %{{.*}} to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* %0) +; CHECK: %1 = bitcast * %{{.*}} to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* %1) +entry: + %a = alloca , align 16 + store zeroinitializer, * %a, align 16 + %a1 = bitcast * %a to i64* + store i64 1, i64* %a1, align 8 + call void @bar(* %a) + %el = load i64, i64* %a1 + ret i64 %el +} diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 3049570bd47b60..b55c429a9d02d7 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -313,6 +313,13 @@ struct LinalgTilingPattern : public LinalgBaseTilingPattern { PatternBenefit benefit = 1) : LinalgBaseTilingPattern(OpTy::getOperationName(), context, options, marker, benefit) {} + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (failed(LinalgBaseTilingPattern::matchAndRewrite(op, rewriter))) + return failure(); + rewriter.eraseOp(op); + return success(); + } }; /// @@ -415,7 +422,8 @@ enum class LinalgLoweringType { AffineLoops = 2, ParallelLoops = 3 }; -template struct LinalgLoweringPattern : public RewritePattern { +template +struct LinalgLoweringPattern : public RewritePattern { LinalgLoweringPattern(MLIRContext *context, LinalgLoweringType loweringType, LinalgMarker marker = LinalgMarker(), PatternBenefit benefit = 1) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index afac3d5f5f9a43..c1aad620fe08af 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -126,8 +126,6 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite( // New marker if specified. marker.replaceLinalgMarker(rewriter, res->op.getOperation()); - - rewriter.eraseOp(op); return success(); }