diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 50301ad91b1d58..e88a6b1bffb064 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -19,6 +19,7 @@ #include "DLL.h" #include "Chunks.h" +#include "SymbolTable.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" @@ -653,9 +654,18 @@ void DelayLoadContents::create(Defined *h) { auto *c = make(extName, 0); names.push_back(make(c)); hintNames.push_back(c); + // Add a syntentic symbol for this load thunk, using the "__imp_load" + // prefix, in case this thunk needs to be added to the list of valid + // call targets for Control Flow Guard. + StringRef symName = saver.save("__imp_load_" + extName); + s->loadThunkSym = + cast(symtab->addSynthetic(symName, t)); } } thunks.push_back(tm); + StringRef tmName = + saver.save("__tailMerge_" + syms[0]->getDLLName().lower()); + symtab->addSynthetic(tmName, tm); // Terminate with null values. addresses.push_back(make(8)); names.push_back(make(8)); diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 1b33634b63d6af..386f861fb27fb7 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -131,7 +131,7 @@ bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { auto considerForICF = [](const SectionChunk &assoc) { StringRef Name = assoc.getSectionName(); return !(Name.startswith(".debug") || Name == ".gfids$y" || - Name == ".gljmp$y"); + Name == ".giats$y" || Name == ".gljmp$y"); }; auto ra = make_filter_range(a->children(), considerForICF); auto rb = make_filter_range(b->children(), considerForICF); diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index aaa00d0f7279a8..37f66131620e69 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -280,6 +280,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, debugChunks.push_back(c); else if (name == ".gfids$y") guardFidChunks.push_back(c); + else if (name == ".giats$y") + guardIATChunks.push_back(c); else if (name == ".gljmp$y") guardLJmpChunks.push_back(c); else if (name == ".sxdata") diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 0a5114b165f0cb..26a6e5b7b70d96 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -144,6 +144,7 @@ class ObjFile : public InputFile { ArrayRef getDebugChunks() { return debugChunks; } ArrayRef getSXDataChunks() { return sxDataChunks; } ArrayRef getGuardFidChunks() { return guardFidChunks; } + ArrayRef getGuardIATChunks() { return guardIATChunks; } ArrayRef getGuardLJmpChunks() { return guardLJmpChunks; } ArrayRef getSymbols() { return symbols; } @@ -283,9 +284,11 @@ class ObjFile : public InputFile { // 32-bit x86. std::vector sxDataChunks; - // Chunks containing symbol table indices of address taken symbols and longjmp - // targets. These are not linked into the final binary when /guard:cf is set. + // Chunks containing symbol table indices of address taken symbols, address + // taken IAT entries, and longjmp targets. These are not linked into the + // final binary when /guard:cf is set. std::vector guardFidChunks; + std::vector guardIATChunks; std::vector guardLJmpChunks; // This vector contains a list of all symbols defined or referenced by this diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 1da4df3669662e..370f72745900d4 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -343,6 +343,13 @@ class DefinedImportData : public Defined { uint16_t getOrdinal() { return file->hdr->OrdinalHint; } ImportFile *file; + + // This is a pointer to the synthetic symbol associated with the load thunk + // for this symbol that will be called if the DLL is delay-loaded. This is + // needed for Control Flow Guard because if this DefinedImportData symbol is a + // valid call target, the corresponding load thunk must also be marked as a + // valid call target. + DefinedSynthetic *loadThunkSym; }; // This class represents a symbol for a jump table entry which jumps diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index d1081b008ea40e..b437a681483ff3 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -227,6 +227,9 @@ class Writer { void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); + void getSymbolsFromSections(ObjFile *file, + ArrayRef symIdxChunks, + std::vector &symbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym); void setSectionPermissions(); @@ -605,8 +608,9 @@ void Writer::run() { createImportTables(); createSections(); - createMiscChunks(); appendImportThunks(); + // Import thunks must be added before the Control Flow Guard tables are added. + createMiscChunks(); createExportTable(); mergeSections(); removeUnusedSections(); @@ -1618,6 +1622,8 @@ static void markSymbolsWithRelocations(ObjFile *file, // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; + SymbolRVASet giatsRVASet; + std::vector giatsSymbols; SymbolRVASet longJmpTargets; for (ObjFile *file : ObjFile::instances) { // If the object was compiled with /guard:cf, the address taken symbols @@ -1627,6 +1633,8 @@ void Writer::createGuardCFTables() { // possibly address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); + markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet); + getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); @@ -1641,6 +1649,16 @@ void Writer::createGuardCFTables() { for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); + // For each entry in the .giats table, check if it has a corresponding load + // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if + // so, add the load thunk to the address taken (.gfids) table. + for (Symbol *s : giatsSymbols) { + if (auto *di = dyn_cast(s)) { + if (di->loadThunkSym) + addSymbolToRVASet(addressTakenSyms, di->loadThunkSym); + } + } + // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) @@ -1649,6 +1667,10 @@ void Writer::createGuardCFTables() { maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); + // Add the Guard Address Taken IAT Entry Table (.giats). + maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table", + "__guard_iat_count"); + // Add the longjmp target table unless the user told us not to. if (config->guardCF == GuardCFLevel::Full) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", @@ -1665,11 +1687,11 @@ void Writer::createGuardCFTables() { } // Take a list of input sections containing symbol table indices and add those -// symbols to an RVA table. The challenge is that symbol RVAs are not known and +// symbols to a vector. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. -void Writer::markSymbolsForRVATable(ObjFile *file, +void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, - SymbolRVASet &tableSymbols) { + std::vector &symbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. @@ -1687,7 +1709,7 @@ void Writer::markSymbolsForRVATable(ObjFile *file, } // Read each symbol table index and check if that symbol was included in the - // final link. If so, add it to the table symbol set. + // final link. If so, add it to the vector of symbols. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); @@ -1699,12 +1721,24 @@ void Writer::markSymbolsForRVATable(ObjFile *file, } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) - addSymbolToRVASet(tableSymbols, cast(s)); + symbols.push_back(cast(s)); } } } } +// Take a list of input sections containing symbol table indices and add those +// symbols to an RVA table. +void Writer::markSymbolsForRVATable(ObjFile *file, + ArrayRef symIdxChunks, + SymbolRVASet &tableSymbols) { + std::vector syms; + getSymbolsFromSections(file, symIdxChunks, syms); + + for (Symbol *s : syms) + addSymbolToRVASet(tableSymbols, cast(s)); +} + // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. diff --git a/lld/test/COFF/giats.s b/lld/test/COFF/giats.s new file mode 100644 index 00000000000000..f18720f3692faa --- /dev/null +++ b/lld/test/COFF/giats.s @@ -0,0 +1,117 @@ +# REQUIRES: x86 + +# Make a DLL that exports exportfn1. +# RUN: yaml2obj %p/Inputs/export.yaml -o %basename_t-exp.obj +# RUN: lld-link /out:%basename_t-exp.dll /dll %basename_t-exp.obj /export:exportfn1 /implib:%basename_t-exp.lib + +# Make an object file that imports exportfn1. +# RUN: llvm-mc -triple x86_64-windows-msvc %s -filetype=obj -o %basename_t.obj + +# Check that the Guard address-taken IAT entry tables are propagated to the final executable. +# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-nodelay.exe %basename_t-exp.lib +# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-nodelay.exe | FileCheck %s --check-prefix CHECK + +# CHECK: ImageBase: 0x140000000 +# CHECK: LoadConfig [ +# CHECK: GuardCFFunctionTable: 0x140002114 +# CHECK: GuardCFFunctionCount: 1 +# CHECK: GuardFlags: 0x10500 +# CHECK: GuardAddressTakenIatEntryTable: 0x140002118 +# CHECK: GuardAddressTakenIatEntryCount: 1 +# CHECK: ] +# CHECK: GuardFidTable [ +# CHECK-NEXT: 0x14000{{.*}} +# CHECK-NEXT: ] +# CHECK: GuardIatTable [ +# CHECK-NEXT: 0x14000{{.*}} +# CHECK-NEXT: ] + + +# Check that the additional load thunk symbol is added to the GFIDs table. +# RUN: lld-link %basename_t.obj -guard:cf -entry:main -out:%basename_t-delay.exe %basename_t-exp.lib -alternatename:__delayLoadHelper2=main -delayload:%basename_t-exp.dll +# RUN: llvm-readobj --file-headers --coff-load-config %basename_t-delay.exe | FileCheck %s --check-prefix DELAY-CHECK + +# DELAY-CHECK: ImageBase: 0x140000000 +# DELAY-CHECK: LoadConfig [ +# DELAY-CHECK: GuardCFFunctionTable: 0x140002114 +# DELAY-CHECK: GuardCFFunctionCount: 2 +# DELAY-CHECK: GuardFlags: 0x10500 +# DELAY-CHECK: GuardAddressTakenIatEntryTable: 0x14000211C +# DELAY-CHECK: GuardAddressTakenIatEntryCount: 1 +# DELAY-CHECK: ] +# DELAY-CHECK: GuardFidTable [ +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: ] +# DELAY-CHECK: GuardIatTable [ +# DELAY-CHECK-NEXT: 0x14000{{.*}} +# DELAY-CHECK-NEXT: ] + + +# This assembly is reduced from C code like: +# __declspec(noinline) +# void IndirectCall(BOOL (func)(HANDLE)) { +# (*func)(NULL); +# } +# int main(int argc, char** argv) { +# IndirectCall(exportfn1); +# } + + .text + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +.set @feat.00, 2048 + .def IndirectCall; .scl 2; .type 32; .endef + .globl IndirectCall # -- Begin function IndirectCall + .p2align 4, 0x90 +IndirectCall: # @IndirectCall +# %bb.0: + subq $40, %rsp + movq %rcx, 32(%rsp) + movq 32(%rsp), %rax + movq %rax, %rdx # This would otherwise have be: movq __guard_dispatch_icall_fptr(%rip), %rdx + xorl %ecx, %ecx + callq *%rdx + nop + addq $40, %rsp + retq + # -- End function + .def main; .scl 2; .type 32; .endef + .globl main # -- Begin function main + .p2align 4, 0x90 +main: # @main +# %bb.0: + subq $56, %rsp + movq __imp_exportfn1(%rip), %rax + movq %rdx, 48(%rsp) + movl %ecx, 44(%rsp) + movq %rax, %rcx + callq IndirectCall + xorl %eax, %eax + addq $56, %rsp + retq + # -- End function + .section .gfids$y,"dr" + .section .giats$y,"dr" + .symidx __imp_exportfn1 + .section .gljmp$y,"dr" + +# Load configuration directory entry (winnt.h _IMAGE_LOAD_CONFIG_DIRECTORY64). +# The linker will define the __guard_* symbols. + .section .rdata,"dr" +.globl _load_config_used +_load_config_used: + .long 256 + .fill 124, 1, 0 + .quad __guard_fids_table + .quad __guard_fids_count + .long __guard_flags + .fill 12, 1, 0 + .quad __guard_iat_table + .quad __guard_iat_count + .quad __guard_longjmp_table + .quad __guard_fids_count + .fill 84, 1, 0 \ No newline at end of file diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index febca473776aa6..158257a5aa9a10 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1677,23 +1677,30 @@ class ScalarEvolution { getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the given FoundCondValue value evaluates to true. + /// whenever the given FoundCondValue value evaluates to true in given + /// Context. If Context is nullptr, then the found predicate is true + /// everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse); + const Value *FoundCondValue, bool Inverse, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1740,6 +1747,18 @@ class ScalarEvolution { const SCEV *FoundLHS, const SCEV *FoundRHS); + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// + /// This routine tries to weaken the known condition basing on fact that + /// FoundLHS is an AddRec. + bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context); + /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 8c6bcba2332b1a..316086833d9756 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -215,6 +215,7 @@ class MCObjectFileInfo { MCSection *XDataSection = nullptr; MCSection *SXDataSection = nullptr; MCSection *GFIDsSection = nullptr; + MCSection *GIATsSection = nullptr; MCSection *GLJMPSection = nullptr; // XCOFF specific sections @@ -398,6 +399,7 @@ class MCObjectFileInfo { MCSection *getXDataSection() const { return XDataSection; } MCSection *getSXDataSection() const { return SXDataSection; } MCSection *getGFIDsSection() const { return GFIDsSection; } + MCSection *getGIATsSection() const { return GIATsSection; } MCSection *getGLJMPSection() const { return GLJMPSection; } // XCOFF specific sections diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e51b31673105cf..a3e454fefcf0fd 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9549,15 +9549,16 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, // Try to prove (Pred, LHS, RHS) using isImpliedCond. auto ProveViaCond = [&](const Value *Condition, bool Inverse) { - if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) + const Instruction *Context = &BB->front(); + if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context)) return true; if (ProvingStrictComparison) { if (!ProvedNonStrictComparison) - ProvedNonStrictComparison = - isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); + ProvedNonStrictComparison = isImpliedCond(NonStrictPredicate, LHS, RHS, + Condition, Inverse, Context); if (!ProvedNonEquality) - ProvedNonEquality = - isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); + ProvedNonEquality = isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, + Condition, Inverse, Context); if (ProvedNonStrictComparison && ProvedNonEquality) return true; } @@ -9623,7 +9624,8 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - const Value *FoundCondValue, bool Inverse) { + const Value *FoundCondValue, bool Inverse, + const Instruction *Context) { if (!PendingLoopPredicates.insert(FoundCondValue).second) return false; @@ -9634,12 +9636,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (const BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || - isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse, + Context) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse, + Context); } } @@ -9657,14 +9663,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); - return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); + return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context); } bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, - const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context) { // Balance the types. if (getTypeSizeInBits(LHS->getType()) < getTypeSizeInBits(FoundLHS->getType())) { @@ -9708,16 +9714,16 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the found predicate is the same as the desired predicate. if (FoundPred == Pred) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check whether swapping the found predicate makes it the same as the // desired predicate. if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { if (isa(RHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context); else - return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), - RHS, LHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), RHS, + LHS, FoundLHS, FoundRHS, Context); } // Unsigned comparison is the same as signed comparison when both the operands @@ -9725,7 +9731,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (CmpInst::isUnsigned(FoundPred) && CmpInst::getSignedPredicate(FoundPred) == Pred && isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) - return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context); // Check if we can make progress by sharpening ranges. if (FoundPred == ICmpInst::ICMP_NE && @@ -9762,8 +9768,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_UGE: // We know V `Pred` SharperMin. If this implies LHS `Pred` // RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, - getConstant(SharperMin))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin), + Context)) return true; LLVM_FALLTHROUGH; @@ -9778,7 +9784,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // // If V `Pred` Min implies LHS `Pred` RHS, we're done. - if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), + Context)) return true; break; @@ -9786,14 +9793,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(SharperMin))) + LHS, V, getConstant(SharperMin), Context)) return true; LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS, - LHS, V, getConstant(Min))) + LHS, V, getConstant(Min), Context)) return true; break; @@ -9807,11 +9814,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) - if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context)) return true; if (Pred == ICmpInst::ICMP_NE) if (!ICmpInst::isTrueWhenEqual(FoundPred)) - if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, + Context)) return true; // Otherwise assume the worst. @@ -9890,6 +9898,44 @@ Optional ScalarEvolution::computeConstantDifference(const SCEV *More, return None; } +bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart( + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) { + // Try to recognize the following pattern: + // + // FoundRHS = ... + // ... + // loop: + // FoundLHS = {Start,+,W} + // context_bb: // Basic block from the same loop + // known(Pred, FoundLHS, FoundRHS) + // + // If some predicate is known in the context of a loop, it is also known on + // each iteration of this loop, including the first iteration. Therefore, in + // this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to + // prove the original pred using this fact. + if (!Context) + return false; + // Make sure AR varies in the context block. + if (auto *AR = dyn_cast(FoundLHS)) { + if (!AR->getLoop()->contains(Context->getParent())) + return false; + if (!isAvailableAtLoopEntry(FoundRHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, AR->getStart(), FoundRHS); + } + + if (auto *AR = dyn_cast(FoundRHS)) { + if (!AR->getLoop()->contains(Context)) + return false; + if (!isAvailableAtLoopEntry(FoundLHS, AR->getLoop())) + return false; + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, AR->getStart()); + } + + return false; +} + bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS) { @@ -10080,13 +10126,18 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS) { + const SCEV *FoundRHS, + const Instruction *Context) { if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) return true; + if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS, + Context)) + return true; + return isImpliedCondOperandsHelper(Pred, LHS, RHS, FoundLHS, FoundRHS) || // ~x < ~y --> x > y diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index 914308d9147e2d..09bcf5cb25a215 100644 --- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains support for writing the metadata for Windows Control Flow -// Guard, including address-taken functions, and valid longjmp targets. +// Guard, including address-taken functions and valid longjmp targets. // //===----------------------------------------------------------------------===// @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -78,20 +78,49 @@ static bool isPossibleIndirectCallTarget(const Function *F) { return false; } +/// Returns true if this function should be added to the Guard Address Taken IAT +/// Entry Table (GIATs) instead of the Guard Function ID Table (GFIDs). +static bool isIATAddressTaken(const Function *F) { + if (F->hasDLLImportStorageClass()) { + return true; + } + return false; +} + void WinCFGuard::endModule() { const Module *M = Asm->MMI->getModule(); - std::vector Functions; - for (const Function &F : *M) - if (isPossibleIndirectCallTarget(&F)) - Functions.push_back(&F); - if (Functions.empty() && LongjmpTargets.empty()) + std::vector GFIDsEntries; + std::vector GIATsEntries; + for (const Function &F : *M) { + if (isPossibleIndirectCallTarget(&F)) { + if (isIATAddressTaken(&F)) { + // If the possible call target is reached via the IAT, add it to the + // GIATs table instead of the GFIDs table. + GIATsEntries.push_back(&F); + } else { + // Otherwise add it to the GFIDs table. + GFIDsEntries.push_back(&F); + } + } + } + + if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty()) return; + + // Emit the symbol index of each GFIDs entry to form the GFIDs table. auto &OS = *Asm->OutStreamer; OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); - for (const Function *F : Functions) + for (const Function *F : GFIDsEntries) OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); - // Emit the symbol index of each longjmp target. + // Emit the symbol index of each GIATs entry to form the GIATs table. + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); + for (const Function *F : GIATsEntries) { + OS.EmitCOFFSymbolIndex(Asm->OutContext.getOrCreateSymbol( + Twine("__imp_") + Asm->getSymbol(F)->getName())); + } + + // Emit the symbol index of each longjmp target to form the GLJMP table. OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { OS.EmitCOFFSymbolIndex(S); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 356eb1ce0964b1..0b3edc3416859d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2715,7 +2715,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b9362f1e762d3e..eef467d116b7ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4613,8 +4613,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index ae7345c4e05b91..eec2615974b578 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -752,6 +752,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + GIATsSection = Ctx->getCOFFSection(".giats$y", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + GLJMPSection = Ctx->getCOFFSection(".gljmp$y", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0c8da4e20d7d05..fb70b2d801da09 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -183,6 +183,8 @@ static bool isMergePassthruOpcode(unsigned Opc) { case AArch64ISD::FROUND_MERGE_PASSTHRU: case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU: case AArch64ISD::FTRUNC_MERGE_PASSTHRU: + case AArch64ISD::FP_ROUND_MERGE_PASSTHRU: + case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU: case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU: case AArch64ISD::FCVTZU_MERGE_PASSTHRU: @@ -990,7 +992,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. - for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); @@ -1018,7 +1019,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -1035,6 +1036,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -1052,6 +1054,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FP_ROUND, VT, Custom); } setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); @@ -1580,6 +1584,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU) @@ -2908,6 +2914,9 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); RTLIB::Libcall LC; @@ -2918,6 +2927,9 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isScalableVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU); + bool IsStrict = Op->isStrictFPOpcode(); SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); EVT SrcVT = SrcVal.getValueType(); @@ -3835,6 +3847,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -9150,6 +9164,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return SDValue(); } +SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getValueType().isScalableVector() && + isTypeLegal(Op.getValueType()) && + "Expected legal scalable vector type!"); + + if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2) + return Op; + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); @@ -15989,7 +16015,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SmallVector Operands = {Pg}; for (const SDValue &V : Op->op_values()) { - assert((isa(V) || V.getValueType().isScalableVector()) && + assert((!V.getValueType().isVector() || + V.getValueType().isScalableVector()) && "Only scalable vectors are supported!"); Operands.push_back(V); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 51391d309b4045..1b8f62e427dbb8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -105,6 +105,8 @@ enum NodeType : unsigned { FROUNDEVEN_MERGE_PASSTHRU, FSQRT_MERGE_PASSTHRU, FTRUNC_MERGE_PASSTHRU, + FP_ROUND_MERGE_PASSTHRU, + FP_EXTEND_MERGE_PASSTHRU, UINT_TO_FP_MERGE_PASSTHRU, SINT_TO_FP_MERGE_PASSTHRU, FCVTZU_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 61155087cbe280..68dc477567a5d8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -914,6 +914,13 @@ def imm0_1 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf ]>; +def SDT_AArch64FCVTR : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVec<4>, + SDTCVecEltisVT<1,i1> +]>; + +def AArch64fcvtr_mt : SDNode<"AArch64ISD::FP_ROUND_MERGE_PASSTHRU", SDT_AArch64FCVTR>; +def AArch64fcvte_mt : SDNode<"AArch64ISD::FP_EXTEND_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64ucvtf_mt : SDNode<"AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; @@ -1178,6 +1185,11 @@ multiclass sve_prefetch; // Extract subvectors from FP SVE vectors + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_S ZPR:$Zs)>; def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), @@ -1195,6 +1207,14 @@ multiclass sve_prefetch; + // Concatenate two floating point vectors. + def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), + (UZP1_ZZZ_H $v1, $v2)>; + def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; @@ -1392,40 +1412,48 @@ multiclass sve_prefetch; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; - defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, null_frag, nxv8f16, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, null_frag, nxv4f32, nxv4i1, nxv8f16, ElementSizeS>; - defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; - defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; - defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; - defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; - defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, null_frag, nxv8f16, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, null_frag, nxv2f64, nxv2i1, nxv8f16, ElementSizeD>; - defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, null_frag, nxv4f32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, null_frag, nxv2f64, nxv2i1, nxv4f32, ElementSizeD>; - defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; - defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; - defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; - defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; - defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; - defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; - defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zdr<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, AArch64fcvtr_mt, nxv4f16, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, AArch64fcvte_mt, nxv4f32, nxv4i1, nxv4f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110010, "scvtf", ZPR16, ZPR16, null_frag, AArch64scvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010100, "scvtf", ZPR32, ZPR32, null_frag, AArch64scvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1010101, "ucvtf", ZPR32, ZPR32, null_frag, AArch64ucvtf_mt, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0110011, "ucvtf", ZPR16, ZPR16, null_frag, AArch64ucvtf_mt, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111010, "fcvtzs", ZPR16, ZPR16, null_frag, AArch64fcvtzs_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011100, "fcvtzs", ZPR32, ZPR32, null_frag, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd< 0b0111011, "fcvtzu", ZPR16, ZPR16, null_frag, AArch64fcvtzu_mt, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd< 0b1011101, "fcvtzu", ZPR32, ZPR32, null_frag, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zdr<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, AArch64fcvtr_mt, nxv2f16, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zdr<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, AArch64fcvtr_mt, nxv2f32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, AArch64fcvte_mt, nxv2f64, nxv2i1, nxv2f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, AArch64ucvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, AArch64scvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd< 0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, AArch64scvtf_mt, nxv4f16, nxv4i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, AArch64scvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, AArch64ucvtf_mt, nxv2f32, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd< 0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, AArch64ucvtf_mt, nxv2f16, nxv2i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110110, "scvtf", ZPR64, ZPR64, null_frag, AArch64scvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1110111, "ucvtf", ZPR64, ZPR64, null_frag, AArch64ucvtf_mt, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd< 0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, null_frag, nxv4i32, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, AArch64fcvtzs_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd< 0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, AArch64fcvtzu_mt, nxv4i32, nxv4i1, nxv4f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd< 0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd< 0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + + // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. + // This is ignored by the pattern below where it is matched by (i64 timm0_1) + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // Floating-point -> signed integer def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index d0226a73d87d2d..45a712c897a441 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -318,6 +318,13 @@ class SVE_1_Op_Passthru_Pat; +// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the +// type of rounding. This is matched by timm0_1 in pattern below and ignored. +class SVE_1_Op_Passthru_Round_Pat +: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2299,6 +2306,25 @@ multiclass sve_fp_2op_p_zd opc, string asm, def : SVE_1_Op_Passthru_Pat(NAME)>; } +multiclass sve_fp_2op_p_zdr opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator int_op, + SDPatternOperator ir_op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + // convert vt1 to a packed type for the intrinsic patterns + defvar packedvt1 = !cond(!eq(!cast(vt1), "nxv2f16"): nxv8f16, + !eq(!cast(vt1), "nxv4f16"): nxv8f16, + !eq(!cast(vt1), "nxv2f32"): nxv4f32, + 1 : vt1); + + def : SVE_3_Op_Pat(NAME)>; + + def : SVE_1_Op_Passthru_Round_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index cd9c38752ad237..ac787a1674ab7a 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -644,47 +644,10 @@ bool LowOverheadLoop::ValidateTailPredicate() { return false; } - // The element count register maybe defined after InsertPt, in which case we - // need to try to move either InsertPt or the def so that the [w|d]lstp can - // use the value. - - if (StartInsertPt != StartInsertBB->end() && - !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) { - if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) { - if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) { - ElemDef->removeFromParent(); - StartInsertBB->insert(StartInsertPt, ElemDef); - LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " - << *ElemDef); - } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) { - StartInsertPt->removeFromParent(); - StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), - &*StartInsertPt); - LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); - } else { - // If we fail to move an instruction and the element count is provided - // by a mov, use the mov operand if it will have the same value at the - // insertion point - MachineOperand Operand = ElemDef->getOperand(1); - if (isMovRegOpcode(ElemDef->getOpcode()) && - RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) == - RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg())) { - TPNumElements = Operand; - NumElements = TPNumElements.getReg(); - } else { - LLVM_DEBUG(dbgs() - << "ARM Loops: Unable to move element count to loop " - << "start instruction.\n"); - return false; - } - } - } - } - // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect // world the [w|d]lstp instruction would be last instruction in the preheader // and so it would only affect instructions within the loop body. But due to - // scheduling, and/or the logic in this pass (above), the insertion point can + // scheduling, and/or the logic in this pass, the insertion point can // be moved earlier. So if the Loop Start isn't the last instruction in the // preheader, and if the initial element count is smaller than the vector // width, the Loop Start instruction will immediately generate one or more @@ -1091,12 +1054,36 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { return true; }; + // We know that we can define safely LR at InsertPt, but maybe we could + // push the insertion point to later on in the basic block. + auto TryAdjustInsertionPoint = [](MachineBasicBlock::iterator &InsertPt, + MachineInstr *Start, + ReachingDefAnalysis &RDA) { + + MachineBasicBlock *MBB = InsertPt->getParent(); + MachineBasicBlock::iterator FirstNonTerminator = + MBB->getFirstTerminator(); + unsigned CountReg = Start->getOperand(0).getReg(); + + // Get the latest possible insertion point and check whether the semantics + // will be maintained if Start was inserted there. + if (FirstNonTerminator == MBB->end()) { + if (RDA.isReachingDefLiveOut(Start, CountReg) && + RDA.isReachingDefLiveOut(Start, ARM::LR)) + InsertPt = FirstNonTerminator; + } else if (RDA.hasSameReachingDef(Start, &*FirstNonTerminator, CountReg) && + RDA.hasSameReachingDef(Start, &*FirstNonTerminator, ARM::LR)) + InsertPt = FirstNonTerminator; + }; + if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, ToRemove)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; return; } + + TryAdjustInsertionPoint(StartInsertPt, Start, RDA); Revert = !ValidateRanges(Start, End, BBUtils, ML); CannotTailPredicate = !ValidateTailPredicate(); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 0dacb266a063d2..550745673bd9f3 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2872,10 +2872,10 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // If this is a logical shift by a constant, recurse then shift the result. if (I->isLogicalShift() && isa(I->getOperand(1))) { - unsigned BitShift = - cast(I->getOperand(1))->getLimitedValue(~0U); + const APInt &BitShift = cast(I->getOperand(1))->getValue(); + // Ensure the shift amount is defined. - if (BitShift > BitWidth) + if (BitShift.uge(BitWidth)) return Result; const auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, @@ -2887,11 +2887,11 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, // Perform the "shift" on BitProvenance. auto &P = Result->Provenance; if (I->getOpcode() == Instruction::Shl) { - P.erase(std::prev(P.end(), BitShift), P.end()); - P.insert(P.begin(), BitShift, BitPart::Unset); + P.erase(std::prev(P.end(), BitShift.getZExtValue()), P.end()); + P.insert(P.begin(), BitShift.getZExtValue(), BitPart::Unset); } else { - P.erase(P.begin(), std::next(P.begin(), BitShift)); - P.insert(P.end(), BitShift, BitPart::Unset); + P.erase(P.begin(), std::next(P.begin(), BitShift.getZExtValue())); + P.insert(P.end(), BitShift.getZExtValue(), BitPart::Unset); } return Result; diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll index 9b980ac25c108c..1b395806755d12 100644 --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -5,6 +5,94 @@ ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning +; +; FP_EXTEND +; + +define @fcvts_nxv2f16( %a) { +; CHECK-LABEL: fcvts_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvts_nxv4f16( %a) { +; CHECK-LABEL: fcvts_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f16( %a) { +; CHECK-LABEL: fcvtd_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv2f32( %a) { +; CHECK-LABEL: fcvtd_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; +; FP_ROUND +; + +define @fcvth_nxv2f32( %a) { +; CHECK-LABEL: fcvth_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f32( %a) { +; CHECK-LABEL: fcvth_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv2f64( %a) { +; CHECK-LABEL: fcvth_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv2f64( %a) { +; CHECK-LABEL: fcvts_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + ; ; FP_TO_SINT ; diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll index fbd9beceaa1f02..6f608c830cfe51 100644 --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -5,6 +5,152 @@ ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning +; FP_EXTEND + +define @fcvts_nxv8f16( %a) { +; CHECK-LABEL: fcvts_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: fcvt z1.s, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f16( %a) { +; CHECK-LABEL: fcvtd_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: fcvt z1.d, p0/m, z2.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f16( %a) { +; CHECK-LABEL: fcvtd_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.h +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: fcvt z2.d, p0/m, z3.h +; CHECK-NEXT: fcvt z3.d, p0/m, z4.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv4f32( %a) { +; CHECK-LABEL: fcvtd_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: fcvt z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +define @fcvtd_nxv8f32( %a) { +; CHECK-LABEL: fcvtd_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: fcvt z1.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: fcvt z3.d, p0/m, z5.s +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + +; FP_ROUND + +define @fcvth_nxv8f32( %a) { +; CHECK-LABEL: fcvth_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv8f64( %a) { +; CHECK-LABEL: fcvth_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z3.h, p0/m, z3.d +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvth_nxv4f64( %a) { +; CHECK-LABEL: fcvth_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: fcvt z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv4f64( %a) { +; CHECK-LABEL: fcvts_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + +define @fcvts_nxv8f64( %a) { +; CHECK-LABEL: fcvts_nxv8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: fcvt z3.s, p0/m, z3.d +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + ; FP_TO_SINT ; Split operand @@ -95,3 +241,144 @@ define @fcvtzu_d_nxv4f32( %a) { %res = fptoui %a to ret %res } + +; SINT_TO_FP + +; Split operand +define @scvtf_s_nxv4i64( %a) { +; CHECK-LABEL: scvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z1.s, p0/m, z1.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv8i64( %a) { +; CHECK-LABEL: scvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z3.h, p0/m, z3.d +; CHECK-NEXT: scvtf z2.h, p0/m, z2.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; Split result +define @scvtf_s_nxv16i8( %a) { +; CHECK-LABEL: scvtf_s_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z1.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i32( %a) { +; CHECK-LABEL: scvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: scvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i1( %a) { +; CHECK-LABEL: scvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: scvtf z0.d, p2/m, z0.d +; CHECK-NEXT: scvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; UINT_TO_FP + +; Split operand +define @ucvtf_s_nxv4i64( %a) { +; CHECK-LABEL: ucvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv8i64( %a) { +; CHECK-LABEL: ucvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d +; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +; Split result +define @ucvtf_d_nxv4i32( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_d_nxv4i1( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d +; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index 3e7c87de0282cb..e5131fd4e1b41e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -153,25 +153,17 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4) - ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: liveins: $lr, $q1, $r0, $r1 + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv12, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1315, align 4) + ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q1 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg @@ -285,27 +277,18 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 ; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7 - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - ; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool) - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg ; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1 + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $q1, $r0, $r1, $r2 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg - ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 2, implicit $vpr - ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4) - ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4) - ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: liveins: $lr, $q1, $r0, $r1 + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4) + ; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4) + ; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.middle.block: ; CHECK: liveins: $q1 ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 94e3e26c819d66..5bafc295a3eff5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -163,17 +163,14 @@ body: | ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr - ; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg + ; CHECK: $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 ; CHECK: bb.1.do.body.i: ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) - ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12 - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: MVE_VPST 4, implicit $vpr - ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) - ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0 - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1 + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12 + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 ; CHECK: bb.2.arm_mean_f32_mve.exit: ; CHECK: successors: %bb.3(0x80000000) ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 1404075dce9014..12c6858c961b5d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -17,16 +17,13 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: add.w lr, r12, r3, lsr #2 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: mov r12, r0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_1: @ %do.body.i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q1, [r12], #16 -; CHECK-NEXT: vaddt.f32 q0, q0, q1 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: vldrw.u32 q1, [r12], #16 +; CHECK-NEXT: vadd.f32 q0, q0, q1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit ; CHECK-NEXT: vmov s4, r1 ; CHECK-NEXT: vadd.f32 s0, s3, s3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir index ea3589f48fdb7b..005524b8788944 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -117,32 +117,21 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir index 0295acb67962d1..f7e0e699c75a1e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -117,32 +117,21 @@ body: | ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: liveins: $r0, $r1, $r2, $r3 - ; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr = t2DLS killed renamable $lr ; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12 ; CHECK: bb.2.vector.body: ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000) - ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12 + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg - ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1) + ; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1) ; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1) + ; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1) ; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 - ; CHECK: MVE_VPST 8, implicit $vpr - ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4) - ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 + ; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4) + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: ; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc bb.0.entry: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index b5cac5d6a3cf86..a0cdb822b370fd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -451,9 +451,9 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r6, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vdup.32 q0, r3 ; CHECK-NEXT: vmov.32 q0[0], r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_5: @ %vector.body46 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 @@ -686,8 +686,8 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) { ; CHECK-NEXT: mla r2, r4, r3, r2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q0, r3 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_6: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index d364eb97fff72a..f3db06e571caf4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1156,8 +1156,8 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index a43f564951e93d..6f9b001ea992be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1116,8 +1116,8 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc ; CHECK-NEXT: @ %bb.5: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1 ; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -1436,9 +1436,9 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(%struct.arm_biqu ; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: vmov.f32 s6, s12 ; CHECK-NEXT: vmov.f32 s10, s14 -; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: vmov.f32 s7, s12 ; CHECK-NEXT: vmov.f32 s11, s14 +; CHECK-NEXT: dls lr, r3 ; CHECK-NEXT: .LBB17_3: @ Parent Loop BB17_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q4, [r1, q0, uxtw #2] @@ -1589,8 +1589,8 @@ define arm_aapcs_vfpcc void @fms(float* nocapture readonly %pSrc1, float* nocapt ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB18_3 Depth 2 ; CHECK-NEXT: ldr r4, [r2] -; CHECK-NEXT: dls lr, r5 ; CHECK-NEXT: vdup.32 q0, r4 +; CHECK-NEXT: dls lr, r5 ; CHECK-NEXT: .LBB18_3: @ %while.body ; CHECK-NEXT: @ Parent Loop BB18_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll index 86cbec661f1f5c..68ebeaa830cb22 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -265,9 +265,9 @@ define arm_aapcs_vfpcc void @fmss1(float* nocapture readonly %x, float* nocaptur ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 @@ -529,9 +529,9 @@ define arm_aapcs_vfpcc void @fms1(float* nocapture readonly %x, float* nocapture ; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph ; CHECK-NEXT: vmov r4, s0 -; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: eor r12, r4, #-2147483648 ; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r4, #4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index bba302d7fbcc0d..d158c85e401b8a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -709,12 +709,12 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16* ; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r0, [sp, #112] ; CHECK-NEXT: sub.w lr, r11, r5 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: mla r3, r0, r5, r1 ; CHECK-NEXT: add r5, r9 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: add.w r5, r0, r5, lsl #1 ; CHECK-NEXT: add.w r3, r6, r3, lsl #1 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_14: @ %for.body8.us.us ; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 ; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll index bfc64b8c8e261c..030fb3b91cf8fa 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll @@ -556,8 +556,8 @@ define void @ptr_iv_v8f16_mult(half* noalias nocapture readonly %A, half* noalia ; CHECK-NEXT: vmov.f16 r1, s0 ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: adr r2, .LCPI9_1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q2, [r0, q0, uxtw #1] diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index 4054b75edd0ed0..a4a67512b7199f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -11,9 +11,9 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: movw lr, #1250 ; CHECK-NEXT: vmov.i32 q1, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vadd.i32 q0, q0, r1 ; CHECK-NEXT: adds r1, r3, #4 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 @@ -231,17 +231,11 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: beq.w .LBB3_3 ; CHECK-NEXT: @ %bb.1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: adr r7, .LCPI3_5 -; CHECK-NEXT: bic r3, r3, #3 ; CHECK-NEXT: vmov.i32 q0, #0x8000 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: adr r6, .LCPI3_4 ; CHECK-NEXT: adr r5, .LCPI3_3 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: adr r4, .LCPI3_2 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #160] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r7] ; CHECK-NEXT: adr.w r8, .LCPI3_1 @@ -274,22 +268,18 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q4, [r0, q0] +; CHECK-NEXT: vldrb.u32 q4, [r0, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q7, [r0, q0] +; CHECK-NEXT: vldrb.u32 q7, [r0, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #144] @ 16-byte Reload ; CHECK-NEXT: vldrw.u32 q5, [sp, #112] @ 16-byte Reload -; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i32 q6, q7, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #128] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u32 q1, [r0, q5] +; CHECK-NEXT: vldrb.u32 q1, [r0, q5] ; CHECK-NEXT: vldrw.u32 q2, [sp, #80] @ 16-byte Reload ; CHECK-NEXT: vmul.i32 q3, q4, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #96] @ 16-byte Reload @@ -320,14 +310,12 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: vldrw.u32 q0, [sp, #192] @ 16-byte Reload ; CHECK-NEXT: vshr.u32 q1, q1, #16 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrbt.32 q1, [r1, q0] +; CHECK-NEXT: vstrb.32 q1, [r1, q0] ; CHECK-NEXT: vldrw.u32 q0, [sp, #176] @ 16-byte Reload -; CHECK-NEXT: vpstt -; CHECK-NEXT: vstrbt.32 q2, [r1, q0] -; CHECK-NEXT: vstrbt.32 q6, [r1, q5] +; CHECK-NEXT: vstrb.32 q2, [r1, q0] +; CHECK-NEXT: vstrb.32 q6, [r1, q5] ; CHECK-NEXT: adds r1, #12 -; CHECK-NEXT: le lr, .LBB3_2 +; CHECK-NEXT: letp lr, .LBB3_2 ; CHECK-NEXT: .LBB3_3: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #216 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 0f3e893fd8017a..d67ccd9393cc40 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -257,13 +257,13 @@ define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %inpu ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r7, r11, r3, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB2_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -425,13 +425,13 @@ define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readon ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r7, r11, r3, r1 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldrd r4, r3, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB3_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -735,13 +735,13 @@ define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 ; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r3, r9, r11, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: .LBB5_7: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB5_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -907,13 +907,13 @@ define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ ; CHECK-NEXT: ldr.w r11, [sp, #88] ; CHECK-NEXT: mov r6, r12 ; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r10, r12 ; CHECK-NEXT: mla r3, r9, r11, r0 ; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldrd r7, r0, [sp] @ 8-byte Folded Reload ; CHECK-NEXT: mov r8, r12 +; CHECK-NEXT: dlstp.16 lr, r11 ; CHECK-NEXT: .LBB6_5: @ %for.body24 ; CHECK-NEXT: @ Parent Loop BB6_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 @@ -1120,7 +1120,6 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: ldr.w r1, [r1, r10, lsl #2] ; CHECK-NEXT: ldrd r6, r7, [r0, #32] ; CHECK-NEXT: ldr.w r3, [r3, r10, lsl #2] -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: add.w r6, r6, r2, lsl #2 ; CHECK-NEXT: add.w r12, r12, r1, lsl #2 ; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload @@ -1129,6 +1128,7 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf ; CHECK-NEXT: add.w r1, r2, r11, lsl #2 ; CHECK-NEXT: add.w r8, r1, r11, lsl #2 ; CHECK-NEXT: add.w r9, r8, r11, lsl #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_7: @ Parent Loop BB7_3 Depth=1 ; CHECK-NEXT: @ Parent Loop BB7_6 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll index 12561d560309a0..35e02faa14e018 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-threshold.ll @@ -187,8 +187,8 @@ define arm_aapcs_vfpcc void @thresh_f32(float* %data, i16 zeroext %N, float %T) ; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] @@ -480,8 +480,8 @@ define arm_aapcs_vfpcc void @thresh_rev_f32(float* %data, i16 zeroext %N, float ; CHECK-NEXT: add.w lr, r2, r1, lsr #2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: eor r2, r1, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB8_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index f586857f289f75..fdaea92c4329ce 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -36,8 +36,8 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: mov.w r10, #-1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r5, [r0] @@ -256,10 +256,10 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: adr r7, .LCPI1_1 ; CHECK-NEXT: add.w r12, r0, r3, lsl #2 ; CHECK-NEXT: vldrw.u32 q1, [r7] -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 @@ -544,8 +544,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vdup.32 q1, r7 ; CHECK-NEXT: mov.w r12, #-1 ; CHECK-NEXT: mvn r8, #-2147483648 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload @@ -773,8 +773,8 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: add.w r11, r1, r5, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 ; CHECK-NEXT: add.w r12, r0, r5, lsl #2 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: str r5, [sp] @ 4-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB3_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrd r4, r9, [r0] @@ -1617,8 +1617,8 @@ define arm_aapcs_vfpcc void @ssatmul_8t_q15(i16* nocapture readonly %pSrcA, i16* ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: vdup.32 q1, r12 ; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q5, [sp] @ 16-byte Reload @@ -2842,7 +2842,6 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI18_2 @@ -2854,6 +2853,7 @@ define arm_aapcs_vfpcc void @ssatmul_16t_q7(i8* nocapture readonly %pSrcA, i8* n ; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB18_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload @@ -3142,7 +3142,6 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8* ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: sub.w r12, r3, #1 -; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: adr r4, .LCPI19_2 @@ -3154,6 +3153,7 @@ define arm_aapcs_vfpcc void @ssatmul_16ti_q7(i8* nocapture readonly %pSrcA, i8* ; CHECK-NEXT: vmov.i8 q3, #0xff ; CHECK-NEXT: vldrw.u32 q6, [r4] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB19_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload diff --git a/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll b/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll new file mode 100644 index 00000000000000..0ac436cc6add59 --- /dev/null +++ b/llvm/test/CodeGen/WinCFGuard/cfguard-giats.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s +; Control Flow Guard is currently only available on Windows + +declare dllimport i32 @target_func() + +; Test address-taken functions from imported DLLs are added to the +; Guard Address-Taken IAT Entry table (.giats). +define i32 @func_cf_giats() { +entry: + %func_ptr = alloca i32 ()*, align 8 + store i32 ()* @target_func, i32 ()** %func_ptr, align 8 + %0 = load i32 ()*, i32 ()** %func_ptr, align 8 + %1 = call i32 %0() + ret i32 %1 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"cfguard", i32 2} + +; CHECK-LABEL: .section .giats$y,"dr" +; CHECK-NEXT: .symidx __imp_target_func +; CHECK-NOT: .symidx \ No newline at end of file diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 22e27b3e5a29e1..b4fb2e52cb1996 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -67,6 +67,8 @@ struct LoadConfigTables { uint32_t GuardFlags = 0; uint64_t GuardFidTableVA = 0; uint64_t GuardFidTableCount = 0; + uint64_t GuardIatTableVA = 0; + uint64_t GuardIatTableCount = 0; uint64_t GuardLJmpTableVA = 0; uint64_t GuardLJmpTableCount = 0; }; @@ -804,6 +806,11 @@ void COFFDumper::printCOFFLoadConfig() { } } + if (Tables.GuardIatTableVA) { + ListScope LS(W, "GuardIatTable"); + printRVATable(Tables.GuardIatTableVA, Tables.GuardIatTableCount, 4); + } + if (Tables.GuardLJmpTableVA) { ListScope LS(W, "GuardLJmpTable"); printRVATable(Tables.GuardLJmpTableVA, Tables.GuardLJmpTableCount, 4); @@ -888,6 +895,9 @@ void COFFDumper::printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables) { Conf->GuardRFVerifyStackPointerFunctionPointer); W.printHex("HotPatchTableOffset", Conf->HotPatchTableOffset); + Tables.GuardIatTableVA = Conf->GuardAddressTakenIatEntryTable; + Tables.GuardIatTableCount = Conf->GuardAddressTakenIatEntryCount; + Tables.GuardLJmpTableVA = Conf->GuardLongJumpTargetTable; Tables.GuardLJmpTableCount = Conf->GuardLongJumpTargetCount; } diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index ff33495f22711f..e5ffc21fb66466 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1251,4 +1251,36 @@ TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { }); } +TEST_F(ScalarEvolutionsTest, ImpliedViaAddRecStart) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32* %p) { " + "entry: " + " %x = load i32, i32* %p, !range !0 " + " br label %loop " + "loop: " + " %iv = phi i32 [ %x, %entry], [%iv.next, %backedge] " + " %ne.check = icmp ne i32 %iv, 0 " + " br i1 %ne.check, label %backedge, label %exit " + "backedge: " + " %iv.next = add i32 %iv, -1 " + " br label %loop " + "exit:" + " ret void " + "} " + "!0 = !{i32 0, i32 2147483647}", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *X = SE.getSCEV(getInstructionByName(F, "x")); + auto *Context = getInstructionByName(F, "iv.next"); + EXPECT_TRUE(SE.isKnownPredicateAt(ICmpInst::ICMP_NE, X, + SE.getZero(X->getType()), Context)); + }); +} + } // end namespace llvm diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index a7f8c31e226437..e47dafc9bf52b4 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -326,9 +326,9 @@ struct LinalgTilingOptions { /// Set the `tileSizeComputationFunction` to return the values `ts`. The /// values must not fold away when tiling. Otherwise, use a more robust /// `tileSizeComputationFunction`. - LinalgTilingOptions &setTileSizes(ValueRange ts) { - tileSizeComputationFunction = [&](OpBuilder &, Operation *) { - return SmallVector(ts.begin(), ts.end()); + LinalgTilingOptions &setTileSizes(SmallVector ts) { + tileSizeComputationFunction = [=](OpBuilder &, Operation *) { + return ts; }; return *this; }