diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1fc0e0348d0939..7de1dc53596e97 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -27,8 +27,12 @@ #include "clang/AST/Attrs.inc" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtCXX.h" #include "clang/AST/Type.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/LLVM.h" @@ -45,6 +49,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -712,35 +717,304 @@ findRefs(const std::vector &Decls, ParsedAST &AST) { return std::move(RefFinder).take(); } +const Stmt *getFunctionBody(DynTypedNode N) { + if (const auto *FD = N.get()) + return FD->getBody(); + if (const auto *FD = N.get()) + return FD->getBody(); + if (const auto *FD = N.get()) + return FD->getBody(); + if (const auto *FD = N.get()) + return FD->getBody(); + return nullptr; +} + +const Stmt *getLoopBody(DynTypedNode N) { + if (const auto *LS = N.get()) + return LS->getBody(); + if (const auto *LS = N.get()) + return LS->getBody(); + if (const auto *LS = N.get()) + return LS->getBody(); + if (const auto *LS = N.get()) + return LS->getBody(); + return nullptr; +} + +// AST traversal to highlight control flow statements under some root. +// Once we hit further control flow we prune the tree (or at least restrict +// what we highlight) so we capture e.g. breaks from the outer loop only. +class FindControlFlow : public RecursiveASTVisitor { + // Types of control-flow statements we might highlight. + enum Target { + Break = 1, + Continue = 2, + Return = 4, + Case = 8, + Throw = 16, + Goto = 32, + All = Break | Continue | Return | Case | Throw | Goto, + }; + int Ignore = 0; // bitmask of Target - what are we *not* highlighting? + SourceRange Bounds; // Half-open, restricts reported targets. + std::vector &Result; + const SourceManager &SM; + + // Masks out targets for a traversal into D. + // Traverses the subtree using Delegate() if any targets remain. + template + bool filterAndTraverse(DynTypedNode D, const Func &Delegate) { + auto RestoreIgnore = llvm::make_scope_exit( + [OldIgnore(Ignore), this] { Ignore = OldIgnore; }); + if (getFunctionBody(D)) + Ignore = All; + else if (getLoopBody(D)) + Ignore |= Continue | Break; + else if (D.get()) + Ignore |= Break | Case; + // Prune tree if we're not looking for anything. + return (Ignore == All) ? true : Delegate(); + } + + void found(Target T, SourceLocation Loc) { + if (T & Ignore) + return; + if (SM.isBeforeInTranslationUnit(Loc, Bounds.getBegin()) || + SM.isBeforeInTranslationUnit(Bounds.getEnd(), Loc)) + return; + Result.push_back(Loc); + } + +public: + FindControlFlow(SourceRange Bounds, std::vector &Result, + const SourceManager &SM) + : Bounds(Bounds), Result(Result), SM(SM) {} + + // When traversing function or loops, limit targets to those that still + // refer to the original root. + bool TraverseDecl(Decl *D) { + return !D || filterAndTraverse(DynTypedNode::create(*D), [&] { + return RecursiveASTVisitor::TraverseDecl(D); + }); + } + bool TraverseStmt(Stmt *S) { + return !S || filterAndTraverse(DynTypedNode::create(*S), [&] { + return RecursiveASTVisitor::TraverseStmt(S); + }); + } + + // Add leaves that we found and want. + bool VisitReturnStmt(ReturnStmt *R) { + found(Return, R->getReturnLoc()); + return true; + } + bool VisitBreakStmt(BreakStmt *B) { + found(Break, B->getBreakLoc()); + return true; + } + bool VisitContinueStmt(ContinueStmt *C) { + found(Continue, C->getContinueLoc()); + return true; + } + bool VisitSwitchCase(SwitchCase *C) { + found(Case, C->getKeywordLoc()); + return true; + } + bool VisitCXXThrowExpr(CXXThrowExpr *T) { + found(Throw, T->getThrowLoc()); + return true; + } + bool VisitGotoStmt(GotoStmt *G) { + // Goto is interesting if its target is outside the root. + if (const auto *LD = G->getLabel()) { + if (SM.isBeforeInTranslationUnit(LD->getLocation(), Bounds.getBegin()) || + SM.isBeforeInTranslationUnit(Bounds.getEnd(), LD->getLocation())) + found(Goto, G->getGotoLoc()); + } + return true; + } +}; + +// Given a location within a switch statement, return the half-open range that +// covers the case it's contained in. +// We treat `case X: case Y: ...` as one case, and assume no other fallthrough. +SourceRange findCaseBounds(const SwitchStmt &Switch, SourceLocation Loc, + const SourceManager &SM) { + // Cases are not stored in order, sort them first. + // (In fact they seem to be stored in reverse order, don't rely on this) + std::vector Cases; + for (const SwitchCase *Case = Switch.getSwitchCaseList(); Case; + Case = Case->getNextSwitchCase()) + Cases.push_back(Case); + llvm::sort(Cases, [&](const SwitchCase *L, const SwitchCase *R) { + return SM.isBeforeInTranslationUnit(L->getKeywordLoc(), R->getKeywordLoc()); + }); + + // Find the first case after the target location, the end of our range. + auto CaseAfter = llvm::partition_point(Cases, [&](const SwitchCase *C) { + return !SM.isBeforeInTranslationUnit(Loc, C->getKeywordLoc()); + }); + SourceLocation End = CaseAfter == Cases.end() ? Switch.getEndLoc() + : (*CaseAfter)->getKeywordLoc(); + + // Our target can be before the first case - cases are optional! + if (CaseAfter == Cases.begin()) + return SourceRange(Switch.getBeginLoc(), End); + // The start of our range is usually the previous case, but... + auto CaseBefore = std::prev(CaseAfter); + // ... rewind CaseBefore to the first in a `case A: case B: ...` sequence. + while (CaseBefore != Cases.begin() && + (*std::prev(CaseBefore))->getSubStmt() == *CaseBefore) + --CaseBefore; + return SourceRange((*CaseBefore)->getKeywordLoc(), End); +} + +// Returns the locations of control flow statements related to N. e.g.: +// for => branches: break/continue/return/throw +// break => controlling loop (forwhile/do), and its related control flow +// return => all returns/throws from the same function +// When an inner block is selected, we include branches bound to outer blocks +// as these are exits from the inner block. e.g. return in a for loop. +// FIXME: We don't analyze catch blocks, throw is treated the same as return. +std::vector relatedControlFlow(const SelectionTree::Node &N) { + const SourceManager &SM = + N.getDeclContext().getParentASTContext().getSourceManager(); + std::vector Result; + + // First, check if we're at a node that can resolve to a root. + enum class Cur { None, Break, Continue, Return, Case, Throw } Cursor; + if (N.ASTNode.get()) { + Cursor = Cur::Break; + } else if (N.ASTNode.get()) { + Cursor = Cur::Continue; + } else if (N.ASTNode.get()) { + Cursor = Cur::Return; + } else if (N.ASTNode.get()) { + Cursor = Cur::Throw; + } else if (N.ASTNode.get()) { + Cursor = Cur::Case; + } else if (const GotoStmt *GS = N.ASTNode.get()) { + // We don't know what root to associate with, but highlight the goto/label. + Result.push_back(GS->getGotoLoc()); + if (const auto *LD = GS->getLabel()) + Result.push_back(LD->getLocation()); + Cursor = Cur::None; + } else { + Cursor = Cur::None; + } + + const Stmt *Root = nullptr; // Loop or function body to traverse. + SourceRange Bounds; + // Look up the tree for a root (or just at this node if we didn't find a leaf) + for (const auto *P = &N; P; P = P->Parent) { + // return associates with enclosing function + if (const Stmt *FunctionBody = getFunctionBody(P->ASTNode)) { + if (Cursor == Cur::Return || Cursor == Cur::Throw) { + Root = FunctionBody; + } + break; // other leaves don't cross functions. + } + // break/continue associate with enclosing loop. + if (const Stmt *LoopBody = getLoopBody(P->ASTNode)) { + if (Cursor == Cur::None || Cursor == Cur::Break || + Cursor == Cur::Continue) { + Root = LoopBody; + // Highlight the loop keyword itself. + // FIXME: for do-while, this only covers the `do`.. + Result.push_back(P->ASTNode.getSourceRange().getBegin()); + break; + } + } + // For switches, users think of case statements as control flow blocks. + // We highlight only occurrences surrounded by the same case. + // We don't detect fallthrough (other than 'case X, case Y'). + if (const auto *SS = P->ASTNode.get()) { + if (Cursor == Cur::Break || Cursor == Cur::Case) { + Result.push_back(SS->getSwitchLoc()); // Highlight the switch. + Root = SS->getBody(); + // Limit to enclosing case, if there is one. + Bounds = findCaseBounds(*SS, N.ASTNode.getSourceRange().getBegin(), SM); + break; + } + } + // If we didn't start at some interesting node, we're done. + if (Cursor == Cur::None) + break; + } + if (Root) { + if (!Bounds.isValid()) + Bounds = Root->getSourceRange(); + FindControlFlow(Bounds, Result, SM).TraverseStmt(const_cast(Root)); + } + return Result; +} + +DocumentHighlight toHighlight(const ReferenceFinder::Reference &Ref, + const SourceManager &SM) { + DocumentHighlight DH; + DH.range = Ref.range(SM); + if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write)) + DH.kind = DocumentHighlightKind::Write; + else if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Read)) + DH.kind = DocumentHighlightKind::Read; + else + DH.kind = DocumentHighlightKind::Text; + return DH; +} + +llvm::Optional toHighlight(SourceLocation Loc, + const syntax::TokenBuffer &TB) { + Loc = TB.sourceManager().getFileLoc(Loc); + if (const auto *Tok = TB.spelledTokenAt(Loc)) { + DocumentHighlight Result; + Result.range = halfOpenToRange( + TB.sourceManager(), + CharSourceRange::getCharRange(Tok->location(), Tok->endLocation())); + return Result; + } + return llvm::None; +} + } // namespace std::vector findDocumentHighlights(ParsedAST &AST, Position Pos) { const SourceManager &SM = AST.getSourceManager(); // FIXME: show references to macro within file? - DeclRelationSet Relations = - DeclRelation::TemplatePattern | DeclRelation::Alias; auto CurLoc = sourceLocationInMainFile(SM, Pos); if (!CurLoc) { llvm::consumeError(CurLoc.takeError()); return {}; } - auto References = findRefs(getDeclAtPosition(AST, *CurLoc, Relations), AST); - - // FIXME: we may get multiple DocumentHighlights with the same location and - // different kinds, deduplicate them. std::vector Result; - for (const auto &Ref : References) { - DocumentHighlight DH; - DH.range = Ref.range(SM); - if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write)) - DH.kind = DocumentHighlightKind::Write; - else if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Read)) - DH.kind = DocumentHighlightKind::Read; - else - DH.kind = DocumentHighlightKind::Text; - Result.push_back(std::move(DH)); - } + auto TryTree = [&](SelectionTree ST) { + if (const SelectionTree::Node *N = ST.commonAncestor()) { + DeclRelationSet Relations = + DeclRelation::TemplatePattern | DeclRelation::Alias; + auto Decls = targetDecl(N->ASTNode, Relations); + if (!Decls.empty()) { + auto Refs = findRefs({Decls.begin(), Decls.end()}, AST); + // FIXME: we may get multiple DocumentHighlights with the same location + // and different kinds, deduplicate them. + for (const auto &Ref : findRefs({Decls.begin(), Decls.end()}, AST)) + Result.push_back(toHighlight(Ref, SM)); + return true; + } + auto ControlFlow = relatedControlFlow(*N); + if (!ControlFlow.empty()) { + for (SourceLocation Loc : ControlFlow) + if (auto Highlight = toHighlight(Loc, AST.getTokens())) + Result.push_back(std::move(*Highlight)); + return true; + } + } + return false; + }; + + unsigned Offset = + AST.getSourceManager().getDecomposedSpellingLoc(*CurLoc).second; + SelectionTree::createEach(AST.getASTContext(), AST.getTokens(), Offset, + Offset, TryTree); return Result; } diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 77e863895f8032..b73a310e95fb2f 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -116,6 +116,141 @@ TEST(HighlightsTest, All) { } } +TEST(HighlightsTest, ControlFlow) { + const char *Tests[] = { + R"cpp( + // Highlight same-function returns. + int fib(unsigned n) { + if (n <= 1) [[ret^urn]] 1; + [[return]] fib(n - 1) + fib(n - 2); + + // Returns from other functions not highlighted. + auto Lambda = [] { return; }; + class LocalClass { void x() { return; } }; + } + )cpp", + + R"cpp( + #define FAIL() return false + #define DO(x) { x; } + bool foo(int n) { + if (n < 0) [[FAIL]](); + DO([[re^turn]] true) + } + )cpp", + + R"cpp( + // Highlight loop control flow + int magic() { + int counter = 0; + [[^for]] (char c : "fruit loops!") { + if (c == ' ') [[continue]]; + counter += c; + if (c == '!') [[break]]; + if (c == '?') [[return]] -1; + } + return counter; + } + )cpp", + + R"cpp( + // Highlight loop and same-loop control flow + void nonsense() { + [[while]] (true) { + if (false) [[bre^ak]]; + switch (1) break; + [[continue]]; + } + } + )cpp", + + R"cpp( + // Highlight switch for break (but not other breaks). + void describe(unsigned n) { + [[switch]](n) { + case 0: + break; + [[default]]: + [[^break]]; + } + } + )cpp", + + R"cpp( + // Highlight case and exits for switch-break (but not other cases). + void describe(unsigned n) { + [[switch]](n) { + case 0: + break; + [[case]] 1: + [[default]]: + [[return]]; + [[^break]]; + } + } + )cpp", + + R"cpp( + // Highlight exits and switch for case + void describe(unsigned n) { + [[switch]](n) { + case 0: + break; + [[case]] 1: + [[d^efault]]: + [[return]]; + [[break]]; + } + } + )cpp", + + R"cpp( + // Highlight nothing for switch. + void describe(unsigned n) { + s^witch(n) { + case 0: + break; + case 1: + default: + return; + break; + } + } + )cpp", + + R"cpp( + // FIXME: match exception type against catch blocks + int catchy() { + try { // wrong: highlight try with matching catch + try { // correct: has no matching catch + [[thr^ow]] "oh no!"; + } catch (int) { } // correct: catch doesn't match type + [[return]] -1; // correct: exits the matching catch + } catch (const char*) { } // wrong: highlight matching catch + [[return]] 42; // wrong: throw doesn't exit function + } + )cpp", + + R"cpp( + // Loop highlights goto exiting the loop, but not jumping within it. + void jumpy() { + [[wh^ile]](1) { + up: + if (0) [[goto]] out; + goto up; + } + out: return; + } + )cpp", + }; + for (const char *Test : Tests) { + Annotations T(Test); + auto AST = TestTU::withCode(T.code()).build(); + EXPECT_THAT(findDocumentHighlights(AST, T.point()), HighlightsFrom(T)) + << Test; + } +} + MATCHER_P3(Sym, Name, Decl, DefOrNone, "") { llvm::Optional Def = DefOrNone; if (Name != arg.Name) { diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index dcf1f28994de42..c977dde8c52ff4 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1403,6 +1403,24 @@ Ref-counted types hold their ref-countable data by a raw pointer and allow impli struct Derived : RefCntblBase { }; // warn +.. _webkit-WebKitNoUncountedMemberChecker: + +webkit.WebKitNoUncountedMemberChecker +"""""""""""""""""""""""""""""""""""" +Raw pointers and references to uncounted types can't be used as class members. Only ref-counted types are allowed. + +.. code-block:: cpp + struct RefCntbl { + void ref() {} + void deref() {} + }; + + struct Foo { + RefCntbl * ptr; // warn + RefCntbl & ptr; // warn + // ... + }; + .. _alpha-checkers: Experimental Checkers diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 0ca4941789e75a..deca0b82c4e33f 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -232,6 +232,11 @@ class Expr : public ValueStmt { /// a problem with a generic expression. SourceLocation getExprLoc() const LLVM_READONLY; + /// Determine whether an lvalue-to-rvalue conversion should implicitly be + /// applied to this expression if it appears as a discarded-value expression + /// in C++11 onwards. This applies to certain forms of volatile glvalues. + bool isReadIfDiscardedInCPlusPlus11() const; + /// isUnusedResultAWarning - Return true if this immediate expression should /// be warned about if the result is unused. If so, fill in expr, location, /// and ranges with expr to warn on and source locations/ranges appropriate diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index ef90bdf84c8abc..fa07e9ae76c853 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -175,7 +175,7 @@ def ext_unknown_escape : ExtWarn<"unknown escape sequence '\\%0'">, def err_invalid_digit : Error< "invalid digit '%0' in %select{decimal|octal|binary}1 constant">; def err_invalid_suffix_constant : Error< - "invalid suffix '%0' on %select{integer|floating}1 constant">; + "invalid suffix '%0' on %select{integer|floating|fixed-point}1 constant">; def warn_cxx11_compat_digit_separator : Warning< "digit separators are incompatible with C++ standards before C++14">, InGroup, DefaultIgnore; diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index b9d64c24a00bd1..6829771b283088 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -71,7 +71,9 @@ class NumericLiteralParser { bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk - bool isFixedPointLiteral() const { return saw_fixed_point_suffix; } + bool isFixedPointLiteral() const { + return (saw_period || saw_exponent) && saw_fixed_point_suffix; + } bool isIntegerLiteral() const { return !saw_period && !saw_exponent && !isFixedPointLiteral(); diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 2ba3881c613517..2d69d8f3442099 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1623,4 +1623,8 @@ let ParentPackage = WebKit in { def RefCntblBaseVirtualDtorChecker : Checker<"RefCntblBaseVirtualDtor">, HelpText<"Check for any ref-countable base class having virtual destructor.">, Documentation; + +def WebKitNoUncountedMemberChecker : Checker<"WebKitNoUncountedMemberChecker">, + HelpText<"Check for no uncounted member variables.">, + Documentation; } // end webkit diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 4c175fff642178..feb0517204c4b3 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2267,6 +2267,64 @@ Stmt *BlockExpr::getBody() { // Generic Expression Routines //===----------------------------------------------------------------------===// +bool Expr::isReadIfDiscardedInCPlusPlus11() const { + // In C++11, discarded-value expressions of a certain form are special, + // according to [expr]p10: + // The lvalue-to-rvalue conversion (4.1) is applied only if the + // expression is an lvalue of volatile-qualified type and it has + // one of the following forms: + if (!isGLValue() || !getType().isVolatileQualified()) + return false; + + const Expr *E = IgnoreParens(); + + // - id-expression (5.1.1), + if (isa(E)) + return true; + + // - subscripting (5.2.1), + if (isa(E)) + return true; + + // - class member access (5.2.5), + if (isa(E)) + return true; + + // - indirection (5.3.1), + if (auto *UO = dyn_cast(E)) + if (UO->getOpcode() == UO_Deref) + return true; + + if (auto *BO = dyn_cast(E)) { + // - pointer-to-member operation (5.5), + if (BO->isPtrMemOp()) + return true; + + // - comma expression (5.18) where the right operand is one of the above. + if (BO->getOpcode() == BO_Comma) + return BO->getRHS()->isReadIfDiscardedInCPlusPlus11(); + } + + // - conditional expression (5.16) where both the second and the third + // operands are one of the above, or + if (auto *CO = dyn_cast(E)) + return CO->getTrueExpr()->isReadIfDiscardedInCPlusPlus11() && + CO->getFalseExpr()->isReadIfDiscardedInCPlusPlus11(); + // The related edge case of "*x ?: *x". + if (auto *BCO = + dyn_cast(E)) { + if (auto *OVE = dyn_cast(BCO->getTrueExpr())) + return OVE->getSourceExpr()->isReadIfDiscardedInCPlusPlus11() && + BCO->getFalseExpr()->isReadIfDiscardedInCPlusPlus11(); + } + + // Objective-C++ extensions to the rule. + if (isa(E) || isa(E)) + return true; + + return false; +} + /// isUnusedResultAWarning - Return true if this immediate expression should /// be warned about if the result is unused. If so, fill in Loc and Ranges /// with location to warn on and the source range[s] to report with the @@ -2555,20 +2613,31 @@ bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc, } case CXXFunctionalCastExprClass: case CStyleCastExprClass: { - // Ignore an explicit cast to void unless the operand is a non-trivial - // volatile lvalue. + // Ignore an explicit cast to void, except in C++98 if the operand is a + // volatile glvalue for which we would trigger an implicit read in any + // other language mode. (Such an implicit read always happens as part of + // the lvalue conversion in C, and happens in C++ for expressions of all + // forms where it seems likely the user intended to trigger a volatile + // load.) const CastExpr *CE = cast(this); + const Expr *SubE = CE->getSubExpr()->IgnoreParens(); if (CE->getCastKind() == CK_ToVoid) { - if (CE->getSubExpr()->isGLValue() && - CE->getSubExpr()->getType().isVolatileQualified()) { - const DeclRefExpr *DRE = - dyn_cast(CE->getSubExpr()->IgnoreParens()); - if (!(DRE && isa(DRE->getDecl()) && - cast(DRE->getDecl())->hasLocalStorage()) && - !isa(CE->getSubExpr()->IgnoreParens())) { - return CE->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, - R1, R2, Ctx); - } + if (Ctx.getLangOpts().CPlusPlus && !Ctx.getLangOpts().CPlusPlus11 && + SubE->isReadIfDiscardedInCPlusPlus11()) { + // Suppress the "unused value" warning for idiomatic usage of + // '(void)var;' used to suppress "unused variable" warnings. + if (auto *DRE = dyn_cast(SubE)) + if (auto *VD = dyn_cast(DRE->getDecl())) + if (!VD->isExternallyVisible()) + return false; + + // The lvalue-to-rvalue conversion would have no effect for an array. + // It's implausible that the programmer expected this to result in a + // volatile array load, so don't warn. + if (SubE->getType()->isArrayType()) + return false; + + return SubE->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); } return false; } diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 81c13a8104e8a4..ad34c287b5188a 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -151,6 +151,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("_ARCH_PWR8"); if (ArchDefs & ArchDefinePwr9) Builder.defineMacro("_ARCH_PWR9"); + if (ArchDefs & ArchDefinePwr10) + Builder.defineMacro("_ARCH_PWR10"); if (ArchDefs & ArchDefineA2) Builder.defineMacro("_ARCH_A2"); if (ArchDefs & ArchDefineA2q) { @@ -313,10 +315,17 @@ bool PPCTargetInfo::initFeatureMap( .Case("e500", true) .Default(false); - // Future CPU should include all of the features of Power 9 as well as any + // Power10 includes all the same features as Power9 plus any features specific + // to the Power10 core. + if (CPU == "pwr10" || CPU == "power10") { + initFeatureMap(Features, Diags, "pwr9", FeaturesVec); + addP10SpecificFeatures(Features); + } + + // Future CPU should include all of the features of Power 10 as well as any // additional features (yet to be determined) specific to it. if (CPU == "future") { - initFeatureMap(Features, Diags, "pwr9", FeaturesVec); + initFeatureMap(Features, Diags, "pwr10", FeaturesVec); addFutureSpecificFeatures(Features); } @@ -333,6 +342,13 @@ bool PPCTargetInfo::initFeatureMap( return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } +// Add any Power10 specific features. +void PPCTargetInfo::addP10SpecificFeatures( + llvm::StringMap &Features) const { + Features["htm"] = false; // HTM was removed for P10. + return; +} + // Add features specific to the "Future" CPU. void PPCTargetInfo::addFutureSpecificFeatures( llvm::StringMap &Features) const { @@ -463,18 +479,17 @@ ArrayRef PPCTargetInfo::getGCCAddlRegNames() const { } static constexpr llvm::StringLiteral ValidCPUNames[] = { - {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, - {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, - {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, - {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, - {"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"}, - {"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, - {"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, - {"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, - {"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"powerpc"}, - {"ppc"}, {"powerpc64"}, {"ppc64"}, {"powerpc64le"}, {"ppc64le"}, - {"future"} -}; + {"generic"}, {"440"}, {"450"}, {"601"}, {"602"}, + {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"}, + {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"}, + {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"}, + {"g5"}, {"a2"}, {"a2q"}, {"e500"}, {"e500mc"}, + {"e5500"}, {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, + {"power5"}, {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, + {"pwr6"}, {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, + {"power8"}, {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, + {"pwr10"}, {"powerpc"}, {"ppc"}, {"powerpc64"}, {"ppc64"}, + {"powerpc64le"}, {"ppc64le"}, {"future"}}; bool PPCTargetInfo::isValidCPUName(StringRef Name) const { return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 7c19a96a99c748..691fa5fdcc6d10 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -43,13 +43,13 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr7 = 1 << 11, ArchDefinePwr8 = 1 << 12, ArchDefinePwr9 = 1 << 13, - ArchDefineFuture = 1 << 14, - ArchDefineA2 = 1 << 15, - ArchDefineA2q = 1 << 16, - ArchDefineE500 = 1 << 17 + ArchDefinePwr10 = 1 << 14, + ArchDefineFuture = 1 << 15, + ArchDefineA2 = 1 << 16, + ArchDefineA2q = 1 << 17, + ArchDefineE500 = 1 << 18 } ArchDefineTypes; - ArchDefineTypes ArchDefs = ArchDefineNone; static const Builtin::Info BuiltinInfo[]; static const char *const GCCRegNames[]; @@ -119,20 +119,20 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { .Case("a2q", ArchDefineName | ArchDefineA2 | ArchDefineA2q) .Cases("power3", "pwr3", ArchDefinePpcgr) .Cases("power4", "pwr4", - ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power5", "pwr5", - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Cases("power5x", "pwr5x", - ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | - ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | + ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power6", "pwr6", - ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | - ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | + ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) .Cases("power6x", "pwr6x", - ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x | - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefinePwr6x | ArchDefinePwr6 | ArchDefinePwr5x | + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Cases("power7", "pwr7", ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | @@ -146,11 +146,16 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq) + .Cases("power10", "pwr10", + ArchDefinePwr10 | ArchDefinePwr9 | ArchDefinePwr8 | + ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | + ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | + ArchDefinePpcsq) .Case("future", - ArchDefineFuture | ArchDefinePwr9 | ArchDefinePwr8 | - ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x | - ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr | - ArchDefinePpcsq) + ArchDefineFuture | ArchDefinePwr10 | ArchDefinePwr9 | + ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 | + ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 | + ArchDefinePpcgr | ArchDefinePpcsq) .Cases("8548", "e500", ArchDefineE500) .Default(ArchDefineNone); } @@ -171,6 +176,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { StringRef CPU, const std::vector &FeaturesVec) const override; + void addP10SpecificFeatures(llvm::StringMap &Features) const; void addFutureSpecificFeatures(llvm::StringMap &Features) const; bool handleTargetFeatures(std::vector &Features, diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 7ec792ca0e1f49..31f8df2430176e 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -236,6 +236,10 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { if (CGM.getCodeGenOpts().EmitCodeView) { PP.MSVCFormatting = true; PP.SplitTemplateClosers = true; + } else { + // For DWARF, printing rules are underspecified. + // SplitTemplateClosers yields better interop with GCC and GDB (PR46052). + PP.SplitTemplateClosers = true; } // Apply -fdebug-prefix-map. @@ -2732,9 +2736,17 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, QualType QTy(Ty, 0); auto SizeExpr = SizeExprCache.find(QTy); if (SizeExpr != SizeExprCache.end()) - Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond()); - else - Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1); + Subscript = DBuilder.getOrCreateSubrange( + SizeExpr->getSecond() /*count*/, nullptr /*lowerBound*/, + nullptr /*upperBound*/, nullptr /*stride*/); + else { + auto *CountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count ? Count : -1)); + Subscript = DBuilder.getOrCreateSubrange( + CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/); + } llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript); uint64_t Size = CGM.getContext().getTypeSize(Ty); @@ -2754,8 +2766,18 @@ llvm::DIType *CGDebugInfo::CreateType(const ConstantMatrixType *Ty, // Create ranges for both dimensions. llvm::SmallVector Subscripts; - Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Ty->getNumColumns())); - Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Ty->getNumRows())); + auto *ColumnCountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumColumns())); + auto *RowCountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumRows())); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + ColumnCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + RowCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts); return DBuilder.createArrayType(Size, Align, ElementTy, SubscriptArray); } @@ -2810,10 +2832,17 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { auto SizeNode = SizeExprCache.find(EltTy); if (SizeNode != SizeExprCache.end()) - Subscripts.push_back( - DBuilder.getOrCreateSubrange(0, SizeNode->getSecond())); - else - Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + SizeNode->getSecond() /*count*/, nullptr /*lowerBound*/, + nullptr /*upperBound*/, nullptr /*stride*/)); + else { + auto *CountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); + } EltTy = Ty->getElementType(); } diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index e5130a9485de77..144e276a6bd872 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -70,6 +70,7 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { .Case("power7", "pwr7") .Case("power8", "pwr8") .Case("power9", "pwr9") + .Case("power10", "pwr10") .Case("future", "future") .Case("pwr3", "pwr3") .Case("pwr4", "pwr4") @@ -80,6 +81,7 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { .Case("pwr7", "pwr7") .Case("pwr8", "pwr8") .Case("pwr9", "pwr9") + .Case("pwr10", "pwr10") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") .Case("powerpc64le", "ppc64le") @@ -91,14 +93,16 @@ std::string ppc::getPPCTargetCPU(const ArgList &Args) { const char *ppc::getPPCAsmModeForCPU(StringRef Name) { return llvm::StringSwitch(Name) - .Case("pwr7", "-mpower7") - .Case("power7", "-mpower7") - .Case("pwr8", "-mpower8") - .Case("power8", "-mpower8") - .Case("ppc64le", "-mpower8") - .Case("pwr9", "-mpower9") - .Case("power9", "-mpower9") - .Default("-many"); + .Case("pwr7", "-mpower7") + .Case("power7", "-mpower7") + .Case("pwr8", "-mpower8") + .Case("power8", "-mpower8") + .Case("ppc64le", "-mpower8") + .Case("pwr9", "-mpower9") + .Case("power9", "-mpower9") + .Case("pwr10", "-mpower10") + .Case("power10", "-mpower10") + .Default("-many"); } void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f33983db3e1eb0..dd83cafb274876 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2045,57 +2045,10 @@ void Clang::AddSystemZTargetArgs(const ArgList &Args, } } -static void addX86AlignBranchArgs(const Driver &D, const ArgList &Args, - ArgStringList &CmdArgs) { - if (Args.hasArg(options::OPT_mbranches_within_32B_boundaries)) { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back("-x86-branches-within-32B-boundaries"); - } - if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) { - StringRef Value = A->getValue(); - unsigned Boundary; - if (Value.getAsInteger(10, Boundary) || Boundary < 16 || - !llvm::isPowerOf2_64(Boundary)) { - D.Diag(diag::err_drv_invalid_argument_to_option) - << Value << A->getOption().getName(); - } else { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back( - Args.MakeArgString("-x86-align-branch-boundary=" + Twine(Boundary))); - } - } - if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) { - std::string AlignBranch; - for (StringRef T : A->getValues()) { - if (T != "fused" && T != "jcc" && T != "jmp" && T != "call" && - T != "ret" && T != "indirect") - D.Diag(diag::err_drv_invalid_malign_branch_EQ) - << T << "fused, jcc, jmp, call, ret, indirect"; - if (!AlignBranch.empty()) - AlignBranch += '+'; - AlignBranch += T; - } - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back(Args.MakeArgString("-x86-align-branch=" + AlignBranch)); - } - if (const Arg *A = Args.getLastArg(options::OPT_mpad_max_prefix_size_EQ)) { - StringRef Value = A->getValue(); - unsigned PrefixSize; - if (Value.getAsInteger(10, PrefixSize)) { - D.Diag(diag::err_drv_invalid_argument_to_option) - << Value << A->getOption().getName(); - } else { - CmdArgs.push_back("-mllvm"); - CmdArgs.push_back( - Args.MakeArgString("-x86-pad-max-prefix-size=" + Twine(PrefixSize))); - } - } -} - void Clang::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); - addX86AlignBranchArgs(D, Args, CmdArgs); + addX86AlignBranchArgs(D, Args, CmdArgs, /*IsLTO=*/false); if (!Args.hasFlag(options::OPT_mred_zone, options::OPT_mno_red_zone, true) || Args.hasArg(options::OPT_mkernel) || @@ -6745,7 +6698,8 @@ void ClangAs::AddMIPSTargetArgs(const ArgList &Args, void ClangAs::AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { - addX86AlignBranchArgs(getToolChain().getDriver(), Args, CmdArgs); + addX86AlignBranchArgs(getToolChain().getDriver(), Args, CmdArgs, + /*IsLTO=*/false); if (Arg *A = Args.getLastArg(options::OPT_masm_EQ)) { StringRef Value = A->getValue(); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 85a1a4e1ac07d5..33c43222b5f9df 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -358,6 +358,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, ArgStringList &CmdArgs, const InputInfo &Output, const InputInfo &Input, bool IsThinLTO) { const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath()); + const Driver &D = ToolChain.getDriver(); if (llvm::sys::path::filename(Linker) != "ld.lld" && llvm::sys::path::stem(Linker) != "ld.lld") { // Tell the linker to load the plugin. This has to come before @@ -374,10 +375,9 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, #endif SmallString<1024> Plugin; - llvm::sys::path::native(Twine(ToolChain.getDriver().Dir) + - "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold" + - Suffix, - Plugin); + llvm::sys::path::native( + Twine(D.Dir) + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold" + Suffix, + Plugin); CmdArgs.push_back(Args.MakeArgString(Plugin)); } @@ -417,7 +417,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, if (IsThinLTO) CmdArgs.push_back("-plugin-opt=thinlto"); - StringRef Parallelism = getLTOParallelism(Args, ToolChain.getDriver()); + StringRef Parallelism = getLTOParallelism(Args, D); if (!Parallelism.empty()) CmdArgs.push_back( Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism))); @@ -449,7 +449,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, if (Arg *A = getLastProfileSampleUseArg(Args)) { StringRef FName = A->getValue(); if (!llvm::sys::fs::exists(FName)) - ToolChain.getDriver().Diag(diag::err_drv_no_such_file) << FName; + D.Diag(diag::err_drv_no_such_file) << FName; else CmdArgs.push_back( Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName)); @@ -492,11 +492,12 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, } // Setup statistics file output. - SmallString<128> StatsFile = - getStatsFileName(Args, Output, Input, ToolChain.getDriver()); + SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D); if (!StatsFile.empty()) CmdArgs.push_back( Args.MakeArgString(Twine("-plugin-opt=stats-file=") + StatsFile)); + + addX86AlignBranchArgs(D, Args, CmdArgs, /*IsLTO=*/true); } void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args, @@ -1423,3 +1424,53 @@ void tools::addMultilibFlag(bool Enabled, const char *const Flag, Multilib::flags_list &Flags) { Flags.push_back(std::string(Enabled ? "+" : "-") + Flag); } + +void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs, bool IsLTO) { + auto addArg = [&, IsLTO](const Twine &Arg) { + if (IsLTO) { + CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg)); + } else { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString(Arg)); + } + }; + + if (Args.hasArg(options::OPT_mbranches_within_32B_boundaries)) { + addArg(Twine("-x86-branches-within-32B-boundaries")); + } + if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) { + StringRef Value = A->getValue(); + unsigned Boundary; + if (Value.getAsInteger(10, Boundary) || Boundary < 16 || + !llvm::isPowerOf2_64(Boundary)) { + D.Diag(diag::err_drv_invalid_argument_to_option) + << Value << A->getOption().getName(); + } else { + addArg("-x86-align-branch-boundary=" + Twine(Boundary)); + } + } + if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) { + std::string AlignBranch; + for (StringRef T : A->getValues()) { + if (T != "fused" && T != "jcc" && T != "jmp" && T != "call" && + T != "ret" && T != "indirect") + D.Diag(diag::err_drv_invalid_malign_branch_EQ) + << T << "fused, jcc, jmp, call, ret, indirect"; + if (!AlignBranch.empty()) + AlignBranch += '+'; + AlignBranch += T; + } + addArg("-x86-align-branch=" + Twine(AlignBranch)); + } + if (const Arg *A = Args.getLastArg(options::OPT_mpad_max_prefix_size_EQ)) { + StringRef Value = A->getValue(); + unsigned PrefixSize; + if (Value.getAsInteger(10, PrefixSize)) { + D.Diag(diag::err_drv_invalid_argument_to_option) + << Value << A->getOption().getName(); + } else { + addArg("-x86-pad-max-prefix-size=" + Twine(PrefixSize)); + } + } +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index c94b2b828c9b83..58bc92c9b7569a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -127,6 +127,8 @@ SmallString<128> getStatsFileName(const llvm::opt::ArgList &Args, void addMultilibFlag(bool Enabled, const char *const Flag, Multilib::flags_list &Flags); +void addX86AlignBranchArgs(const Driver &D, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, bool IsLTO); } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 2b1add4d9b9870..f44614b4bec466 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -583,6 +583,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. + bool isFixedPointConstant = isFixedPointLiteral(); bool isFPConstant = isFloatingLiteral(); // Loop over all of the characters of the suffix. If we see something bad, @@ -737,7 +738,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // Report an error if there are any. PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), diag::err_invalid_suffix_constant) - << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) << isFPConstant; + << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) + << (isFixedPointConstant ? 2 : isFPConstant); hadError = true; } } diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index 276e35a3497e62..834e2533342d47 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -1269,7 +1269,8 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) { // Note that this intentionally doesn't include _Complex _Bool. if (!S.getLangOpts().CPlusPlus) S.Diag(TSTLoc, diag::ext_integer_complex); - } else if (TypeSpecType != TST_float && TypeSpecType != TST_double) { + } else if (TypeSpecType != TST_float && TypeSpecType != TST_double && + TypeSpecType != TST_float128) { S.Diag(TSCLoc, diag::err_invalid_complex_spec) << getSpecifierName((TST)TypeSpecType, Policy); TypeSpecComplex = TSC_unspecified; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 7cda60ba75984c..b655c828168960 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -7660,61 +7660,6 @@ ExprResult Sema::ActOnNoexceptExpr(SourceLocation KeyLoc, SourceLocation, return BuildCXXNoexceptExpr(KeyLoc, Operand, RParen); } -static bool IsSpecialDiscardedValue(Expr *E) { - // In C++11, discarded-value expressions of a certain form are special, - // according to [expr]p10: - // The lvalue-to-rvalue conversion (4.1) is applied only if the - // expression is an lvalue of volatile-qualified type and it has - // one of the following forms: - E = E->IgnoreParens(); - - // - id-expression (5.1.1), - if (isa(E)) - return true; - - // - subscripting (5.2.1), - if (isa(E)) - return true; - - // - class member access (5.2.5), - if (isa(E)) - return true; - - // - indirection (5.3.1), - if (UnaryOperator *UO = dyn_cast(E)) - if (UO->getOpcode() == UO_Deref) - return true; - - if (BinaryOperator *BO = dyn_cast(E)) { - // - pointer-to-member operation (5.5), - if (BO->isPtrMemOp()) - return true; - - // - comma expression (5.18) where the right operand is one of the above. - if (BO->getOpcode() == BO_Comma) - return IsSpecialDiscardedValue(BO->getRHS()); - } - - // - conditional expression (5.16) where both the second and the third - // operands are one of the above, or - if (ConditionalOperator *CO = dyn_cast(E)) - return IsSpecialDiscardedValue(CO->getTrueExpr()) && - IsSpecialDiscardedValue(CO->getFalseExpr()); - // The related edge case of "*x ?: *x". - if (BinaryConditionalOperator *BCO = - dyn_cast(E)) { - if (OpaqueValueExpr *OVE = dyn_cast(BCO->getTrueExpr())) - return IsSpecialDiscardedValue(OVE->getSourceExpr()) && - IsSpecialDiscardedValue(BCO->getFalseExpr()); - } - - // Objective-C++ extensions to the rule. - if (isa(E) || isa(E)) - return true; - - return false; -} - /// Perform the conversions required for an expression used in a /// context that ignores the result. ExprResult Sema::IgnoredValueConversions(Expr *E) { @@ -7739,23 +7684,20 @@ ExprResult Sema::IgnoredValueConversions(Expr *E) { return E; } - if (getLangOpts().CPlusPlus) { + if (getLangOpts().CPlusPlus) { // The C++11 standard defines the notion of a discarded-value expression; // normally, we don't need to do anything to handle it, but if it is a // volatile lvalue with a special form, we perform an lvalue-to-rvalue // conversion. - if (getLangOpts().CPlusPlus11 && E->isGLValue() && - E->getType().isVolatileQualified()) { - if (IsSpecialDiscardedValue(E)) { - ExprResult Res = DefaultLvalueConversion(E); - if (Res.isInvalid()) - return E; - E = Res.get(); - } else { - // Per C++2a [expr.ass]p5, a volatile assignment is not deprecated if - // it occurs as a discarded-value expression. - CheckUnusedVolatileAssignment(E); - } + if (getLangOpts().CPlusPlus11 && E->isReadIfDiscardedInCPlusPlus11()) { + ExprResult Res = DefaultLvalueConversion(E); + if (Res.isInvalid()) + return E; + E = Res.get(); + } else { + // Per C++2a [expr.ass]p5, a volatile assignment is not deprecated if + // it occurs as a discarded-value expression. + CheckUnusedVolatileAssignment(E); } // C++1z: diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 4f885fadf4158d..b3dc7a9f632124 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -121,6 +121,7 @@ add_clang_library(clangStaticAnalyzerCheckers VLASizeChecker.cpp ValistChecker.cpp VirtualCallChecker.cpp + WebKit/NoUncountedMembersChecker.cpp WebKit/PtrTypesSemantics.cpp WebKit/RefCntblBaseVirtualDtorChecker.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 2e90be4350a037..63ebfaf90dc82c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -92,7 +92,27 @@ struct StreamState { /// State of the error flags. /// Ignored in non-opened stream state but must be NoError. - StreamErrorState ErrorState; + StreamErrorState const ErrorState; + + /// Indicate if the file has an "indeterminate file position indicator". + /// This can be set at a failing read or write or seek operation. + /// If it is set no more read or write is allowed. + /// This value is not dependent on the stream error flags: + /// The error flag may be cleared with `clearerr` but the file position + /// remains still indeterminate. + /// This value applies to all error states in ErrorState except FEOF. + /// An EOF+indeterminate state is the same as EOF state. + bool const FilePositionIndeterminate = false; + + StreamState(const FnDescription *L, KindTy S, const StreamErrorState &ES, + bool IsFilePositionIndeterminate) + : LastOperation(L), State(S), ErrorState(ES), + FilePositionIndeterminate(IsFilePositionIndeterminate) { + assert((!ES.isFEof() || !IsFilePositionIndeterminate) && + "FilePositionIndeterminate should be false in FEof case."); + assert((State == Opened || ErrorState.isNoError()) && + "ErrorState should be None in non-opened stream state."); + } bool isOpened() const { return State == Opened; } bool isClosed() const { return State == Closed; } @@ -102,24 +122,27 @@ struct StreamState { // In not opened state error state should always NoError, so comparison // here is no problem. return LastOperation == X.LastOperation && State == X.State && - ErrorState == X.ErrorState; + ErrorState == X.ErrorState && + FilePositionIndeterminate == X.FilePositionIndeterminate; } static StreamState getOpened(const FnDescription *L, - const StreamErrorState &ES = {}) { - return StreamState{L, Opened, ES}; + const StreamErrorState &ES = ErrorNone, + bool IsFilePositionIndeterminate = false) { + return StreamState{L, Opened, ES, IsFilePositionIndeterminate}; } static StreamState getClosed(const FnDescription *L) { - return StreamState{L, Closed, {}}; + return StreamState{L, Closed, {}, false}; } static StreamState getOpenFailed(const FnDescription *L) { - return StreamState{L, OpenFailed, {}}; + return StreamState{L, OpenFailed, {}, false}; } void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddPointer(LastOperation); ID.AddInteger(State); ID.AddInteger(ErrorState); + ID.AddBoolean(FilePositionIndeterminate); } }; @@ -173,7 +196,8 @@ ProgramStateRef bindInt(uint64_t Value, ProgramStateRef State, class StreamChecker : public Checker { mutable std::unique_ptr BT_nullfp, BT_illegalwhence, - BT_UseAfterClose, BT_UseAfterOpenFailed, BT_ResourceLeak, BT_StreamEof; + BT_UseAfterClose, BT_UseAfterOpenFailed, BT_ResourceLeak, BT_StreamEof, + BT_IndeterminatePosition; public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; @@ -279,6 +303,16 @@ class StreamChecker ProgramStateRef ensureStreamOpened(SVal StreamVal, CheckerContext &C, ProgramStateRef State) const; + /// Check that the stream has not an invalid ("indeterminate") file position, + /// generate warning for it. + /// (EOF is not an invalid position.) + /// The returned state can be nullptr if a fatal error was generated. + /// It can return non-null state if the stream has not an invalid position or + /// there is execution path with non-invalid position. + ProgramStateRef + ensureNoFilePositionIndeterminate(SVal StreamVal, CheckerContext &C, + ProgramStateRef State) const; + /// Check the legality of the 'whence' argument of 'fseek'. /// Generate error and return nullptr if it is found to be illegal. /// Otherwise returns the state. @@ -447,6 +481,9 @@ void StreamChecker::preFread(const FnDescription *Desc, const CallEvent &Call, if (!State) return; State = ensureStreamOpened(StreamVal, C, State); + if (!State) + return; + State = ensureNoFilePositionIndeterminate(StreamVal, C, State); if (!State) return; @@ -468,6 +505,9 @@ void StreamChecker::preFwrite(const FnDescription *Desc, const CallEvent &Call, if (!State) return; State = ensureStreamOpened(StreamVal, C, State); + if (!State) + return; + State = ensureNoFilePositionIndeterminate(StreamVal, C, State); if (!State) return; @@ -548,7 +588,9 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc, NewES = (SS->ErrorState == ErrorFEof) ? ErrorFEof : ErrorFEof | ErrorFError; else NewES = ErrorFError; - StreamState NewState = StreamState::getOpened(Desc, NewES); + // If a (non-EOF) error occurs, the resulting value of the file position + // indicator for the stream is indeterminate. + StreamState NewState = StreamState::getOpened(Desc, NewES, !NewES.isFEof()); StateFailed = StateFailed->set(StreamSym, NewState); C.addTransition(StateFailed); } @@ -601,9 +643,11 @@ void StreamChecker::evalFseek(const FnDescription *Desc, const CallEvent &Call, StateNotFailed->set(StreamSym, StreamState::getOpened(Desc)); // We get error. // It is possible that fseek fails but sets none of the error flags. + // If fseek failed, assume that the file position becomes indeterminate in any + // case. StateFailed = StateFailed->set( StreamSym, - StreamState::getOpened(Desc, ErrorNone | ErrorFEof | ErrorFError)); + StreamState::getOpened(Desc, ErrorNone | ErrorFEof | ErrorFError, true)); C.addTransition(StateNotFailed); C.addTransition(StateFailed); @@ -623,7 +667,10 @@ void StreamChecker::evalClearerr(const FnDescription *Desc, assertStreamStateOpened(SS); - State = State->set(StreamSym, StreamState::getOpened(Desc)); + // FilePositionIndeterminate is not cleared. + State = State->set( + StreamSym, + StreamState::getOpened(Desc, ErrorNone, SS->FilePositionIndeterminate)); C.addTransition(State); } @@ -651,7 +698,9 @@ void StreamChecker::evalFeofFerror(const FnDescription *Desc, // From now on it is the only one error state. ProgramStateRef TrueState = bindAndAssumeTrue(State, C, CE); C.addTransition(TrueState->set( - StreamSym, StreamState::getOpened(Desc, ErrorKind))); + StreamSym, StreamState::getOpened(Desc, ErrorKind, + SS->FilePositionIndeterminate && + !ErrorKind.isFEof()))); } if (StreamErrorState NewES = SS->ErrorState & (~ErrorKind)) { // Execution path(s) with ErrorKind not set. @@ -659,7 +708,9 @@ void StreamChecker::evalFeofFerror(const FnDescription *Desc, // New error state is everything before minus ErrorKind. ProgramStateRef FalseState = bindInt(0, State, C, CE); C.addTransition(FalseState->set( - StreamSym, StreamState::getOpened(Desc, NewES))); + StreamSym, + StreamState::getOpened( + Desc, NewES, SS->FilePositionIndeterminate && !NewES.isFEof()))); } } @@ -767,6 +818,55 @@ ProgramStateRef StreamChecker::ensureStreamOpened(SVal StreamVal, return State; } +ProgramStateRef StreamChecker::ensureNoFilePositionIndeterminate( + SVal StreamVal, CheckerContext &C, ProgramStateRef State) const { + SymbolRef Sym = StreamVal.getAsSymbol(); + if (!Sym) + return State; + + const StreamState *SS = State->get(Sym); + if (!SS) + return State; + + assert(SS->isOpened() && "First ensure that stream is opened."); + + if (SS->FilePositionIndeterminate) { + if (!BT_IndeterminatePosition) + BT_IndeterminatePosition.reset( + new BuiltinBug(this, "Invalid stream state", + "File position of the stream might be 'indeterminate' " + "after a failed operation. " + "Can cause undefined behavior.")); + + if (SS->ErrorState & ErrorFEof) { + // The error is unknown but may be FEOF. + // Continue analysis with the FEOF error state. + // Report warning because the other possible error states. + ExplodedNode *N = C.generateNonFatalErrorNode(State); + if (!N) + return nullptr; + + C.emitReport(std::make_unique( + *BT_IndeterminatePosition, BT_IndeterminatePosition->getDescription(), + N)); + return State->set( + Sym, StreamState::getOpened(SS->LastOperation, ErrorFEof, false)); + } + + // Known or unknown error state without FEOF possible. + // Stop analysis, report error. + ExplodedNode *N = C.generateErrorNode(State); + if (N) + C.emitReport(std::make_unique( + *BT_IndeterminatePosition, BT_IndeterminatePosition->getDescription(), + N)); + + return nullptr; + } + + return State; +} + ProgramStateRef StreamChecker::ensureFseekWhenceCorrect(SVal WhenceVal, CheckerContext &C, ProgramStateRef State) const { diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/DiagOutputUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/DiagOutputUtils.h index 4979b8ffc2b20c..781a8d746001fb 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/DiagOutputUtils.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/DiagOutputUtils.h @@ -23,6 +23,14 @@ void printQuotedQualifiedName(llvm::raw_ostream &Os, Os << "'"; } +template +void printQuotedName(llvm::raw_ostream &Os, const NamedDeclDerivedT &D) { + Os << "'"; + D->getNameForDiagnostic(Os, D->getASTContext().getPrintingPolicy(), + /*Qualified=*/false); + Os << "'"; +} + } // namespace clang #endif diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp new file mode 100644 index 00000000000000..89caf602a17e59 --- /dev/null +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/NoUncountedMembersChecker.cpp @@ -0,0 +1,150 @@ +//=======- NoUncountedMembersChecker.cpp -------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ASTUtils.h" +#include "DiagOutputUtils.h" +#include "PtrTypesSemantics.h" +#include "clang/AST/CXXInheritance.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/Casting.h" + +using namespace clang; +using namespace ento; + +namespace { + +class NoUncountedMemberChecker + : public Checker> { +private: + BugType Bug; + mutable BugReporter *BR; + +public: + NoUncountedMemberChecker() + : Bug(this, + "Member variable is a raw-poiner/reference to reference-countable " + "type", + "WebKit coding guidelines") {} + + void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, + BugReporter &BRArg) const { + BR = &BRArg; + + // The calls to checkAST* from AnalysisConsumer don't + // visit template instantiations or lambda classes. We + // want to visit those, so we make our own RecursiveASTVisitor. + struct LocalVisitor : public RecursiveASTVisitor { + const NoUncountedMemberChecker *Checker; + explicit LocalVisitor(const NoUncountedMemberChecker *Checker) + : Checker(Checker) { + assert(Checker); + } + + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { return false; } + + bool VisitRecordDecl(const RecordDecl *RD) { + Checker->visitRecordDecl(RD); + return true; + } + }; + + LocalVisitor visitor(this); + visitor.TraverseDecl(const_cast(TUD)); + } + + void visitRecordDecl(const RecordDecl *RD) const { + if (shouldSkipDecl(RD)) + return; + + for (auto Member : RD->fields()) { + const Type *MemberType = Member->getType().getTypePtrOrNull(); + if (!MemberType) + continue; + + if (auto *MemberCXXRD = MemberType->getPointeeCXXRecordDecl()) { + if (isRefCountable(MemberCXXRD)) + reportBug(Member, MemberType, MemberCXXRD, RD); + } + } + } + + bool shouldSkipDecl(const RecordDecl *RD) const { + if (!RD->isThisDeclarationADefinition()) + return true; + + if (RD->isImplicit()) + return true; + + if (RD->isLambda()) + return true; + + // If the construct doesn't have a source file, then it's not something + // we want to diagnose. + const auto RDLocation = RD->getLocation(); + if (!RDLocation.isValid()) + return true; + + const auto Kind = RD->getTagKind(); + // FIMXE: Should we check union members too? + if (Kind != TTK_Struct && Kind != TTK_Class) + return true; + + // Ignore CXXRecords that come from system headers. + if (BR->getSourceManager().isInSystemHeader(RDLocation)) + return true; + + // Ref-counted smartpointers actually have raw-pointer to uncounted type as + // a member but we trust them to handle it correctly. + return isRefCounted(llvm::dyn_cast_or_null(RD)); + } + + void reportBug(const FieldDecl *Member, const Type *MemberType, + const CXXRecordDecl *MemberCXXRD, + const RecordDecl *ClassCXXRD) const { + assert(Member); + assert(MemberType); + assert(MemberCXXRD); + + SmallString<100> Buf; + llvm::raw_svector_ostream Os(Buf); + + Os << "Member variable "; + printQuotedName(Os, Member); + Os << " in "; + printQuotedQualifiedName(Os, ClassCXXRD); + Os << " is a " + << (isa(MemberType) ? "raw pointer" : "reference") + << " to ref-countable type "; + printQuotedQualifiedName(Os, MemberCXXRD); + Os << "; member variables must be ref-counted."; + + PathDiagnosticLocation BSLoc(Member->getSourceRange().getBegin(), + BR->getSourceManager()); + auto Report = std::make_unique(Bug, Os.str(), BSLoc); + Report->addRange(Member->getSourceRange()); + BR->emitReport(std::move(Report)); + } +}; +} // namespace + +void ento::registerWebKitNoUncountedMemberChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); +} + +bool ento::shouldRegisterWebKitNoUncountedMemberChecker( + const CheckerManager &Mgr) { + return true; +} diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 2a55c996471245..57fde32bc01d06 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1628,10 +1628,6 @@ RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const ElementRegion* R) { - // We do not currently model bindings of the CompoundLiteralregion. - if (isa(R->getBaseRegion())) - return UnknownVal(); - // Check if the region has a binding. if (const Optional &V = B.getDirectBinding(R)) return *V; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp new file mode 100644 index 00000000000000..e88c0b3b0dd036 --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-members.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.WebKitNoUncountedMemberChecker -verify %s + +#include "mock-types.h" + +namespace members { + struct Foo { + private: + RefCountable* a = nullptr; +// expected-warning@-1{{Member variable 'a' in 'members::Foo' is a raw pointer to ref-countable type 'RefCountable'}} + + protected: + RefPtr b; + + public: + RefCountable silenceWarningAboutInit; + RefCountable& c = silenceWarningAboutInit; +// expected-warning@-1{{Member variable 'c' in 'members::Foo' is a reference to ref-countable type 'RefCountable'}} + Ref d; + }; + + template + struct FooTmpl { + T* a; +// expected-warning@-1{{Member variable 'a' in 'members::FooTmpl' is a raw pointer to ref-countable type 'RefCountable'}} + }; + + void forceTmplToInstantiate(FooTmpl) {} +} + +namespace ignore_unions { + union Foo { + RefCountable* a; + RefPtr b; + Ref c; + }; + + template + union RefPtr { + T* a; + }; + + void forceTmplToInstantiate(RefPtr) {} +} diff --git a/clang/test/Analysis/compound-literals.c b/clang/test/Analysis/compound-literals.c index f8b9121494c122..42e6a55a30c7c5 100644 --- a/clang/test/Analysis/compound-literals.c +++ b/clang/test/Analysis/compound-literals.c @@ -1,4 +1,7 @@ -// RUN: %clang_cc1 -triple=i386-apple-darwin10 -analyze -analyzer-checker=debug.ExprInspection -verify %s +// RUN: %clang_cc1 -triple=i386-apple-darwin10 -verify %s -analyze \ +// RUN: -analyzer-checker=debug.ExprInspection + +#define NULL 0 void clang_analyzer_eval(int); // pr28449: Used to crash. @@ -6,3 +9,15 @@ void foo(void) { static const unsigned short array[] = (const unsigned short[]){0x0F00}; clang_analyzer_eval(array[0] == 0x0F00); // expected-warning{{TRUE}} } + +// check that we propagate info through compound literal regions +void bar() { + int *integers = (int[]){1, 2, 3}; + clang_analyzer_eval(integers[0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(integers[1] == 2); // expected-warning{{TRUE}} + clang_analyzer_eval(integers[2] == 3); // expected-warning{{TRUE}} + + int **pointers = (int *[]){&integers[0], NULL}; + clang_analyzer_eval(pointers[0] == NULL); // expected-warning{{FALSE}} + clang_analyzer_eval(pointers[1] == NULL); // expected-warning{{TRUE}} +} diff --git a/clang/test/Analysis/retain-release-compound-literal.m b/clang/test/Analysis/retain-release-compound-literal.m new file mode 100644 index 00000000000000..29a125346363dc --- /dev/null +++ b/clang/test/Analysis/retain-release-compound-literal.m @@ -0,0 +1,25 @@ +// RUN: %clang_analyze_cc1 -verify -Wno-objc-root-class %s \ +// RUN: -analyzer-checker=core,osx.cocoa.RetainCount + +#define NULL 0 +#define CF_RETURNS_RETAINED __attribute__((cf_returns_retained)) +#define CF_CONSUMED __attribute__((cf_consumed)) + +void clang_analyzer_eval(int); + +typedef const void *CFTypeRef; + +extern CFTypeRef CFCreate() CF_RETURNS_RETAINED; +extern CFTypeRef CFRetain(CFTypeRef cf); +extern void CFRelease(CFTypeRef cf); + +void bar(CFTypeRef *v) {} + +void test1() { + CFTypeRef *values = (CFTypeRef[]){ + CFCreate(), // no-warning + CFCreate(), // expected-warning{{leak}} + CFCreate()}; // no-warning + CFRelease(values[0]); + CFRelease(values[2]); +} diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c index cc0147deafdf2c..e91ab2c6c28ccf 100644 --- a/clang/test/Analysis/stream-error.c +++ b/clang/test/Analysis/stream-error.c @@ -76,7 +76,7 @@ void error_fread() { } if (ferror(F)) { clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} - fread(Buf, 1, 10, F); // no warning + fread(Buf, 1, 10, F); // expected-warning {{might be 'indeterminate'}} } } fclose(F); @@ -94,7 +94,7 @@ void error_fwrite() { } else { clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}} - fwrite(0, 1, 10, F); // no warning + fwrite(0, 1, 10, F); // expected-warning {{might be 'indeterminate'}} } fclose(F); Ret = fwrite(0, 1, 10, F); // expected-warning {{Stream might be already closed}} @@ -166,3 +166,70 @@ void error_fseek() { } fclose(F); } + +void error_indeterminate() { + FILE *F = fopen("file", "r+"); + if (!F) + return; + const char *Buf = "123456789"; + int rc = fseek(F, 0, SEEK_SET); + if (rc) { + if (feof(F)) { + fwrite(Buf, 1, 10, F); // no warning + } else if (ferror(F)) { + fwrite(Buf, 1, 10, F); // expected-warning {{might be 'indeterminate'}} + } else { + fwrite(Buf, 1, 10, F); // expected-warning {{might be 'indeterminate'}} + } + } + fclose(F); +} + +void error_indeterminate_clearerr() { + FILE *F = fopen("file", "r+"); + if (!F) + return; + const char *Buf = "123456789"; + int rc = fseek(F, 0, SEEK_SET); + if (rc) { + if (feof(F)) { + clearerr(F); + fwrite(Buf, 1, 10, F); // no warning + } else if (ferror(F)) { + clearerr(F); + fwrite(Buf, 1, 10, F); // expected-warning {{might be 'indeterminate'}} + } else { + clearerr(F); + fwrite(Buf, 1, 10, F); // expected-warning {{might be 'indeterminate'}} + } + } + fclose(F); +} + +void error_indeterminate_feof1() { + FILE *F = fopen("file", "r+"); + if (!F) + return; + char Buf[10]; + if (fread(Buf, 1, 10, F) < 10) { + if (feof(F)) { + // error is feof, should be non-indeterminate + fwrite("1", 1, 1, F); // no warning + } + } + fclose(F); +} + +void error_indeterminate_feof2() { + FILE *F = fopen("file", "r+"); + if (!F) + return; + char Buf[10]; + if (fread(Buf, 1, 10, F) < 10) { + if (ferror(F) == 0) { + // error is feof, should be non-indeterminate + fwrite("1", 1, 1, F); // no warning + } + } + fclose(F); +} diff --git a/clang/test/CodeGen/ppc64-complex-parms.c b/clang/test/CodeGen/ppc64-complex-parms.c index c0e1794bf47c64..1c8aa1d568cf79 100644 --- a/clang/test/CodeGen/ppc64-complex-parms.c +++ b/clang/test/CodeGen/ppc64-complex-parms.c @@ -1,8 +1,19 @@ +// REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -target-feature +float128 -DTEST_F128 -triple \ +// RUN: powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s \ +// RUN: --check-prefix CHECK-F128 float crealf(_Complex float); double creal(_Complex double); long double creall(_Complex long double); +#ifdef TEST_F128 +__float128 crealf128(_Complex __float128); +__float128 foo_f128(_Complex __float128 x) { + return crealf128(x); +} +// CHECK-F128: define fp128 @foo_f128(fp128 {{[%A-Za-z0-9.]+}}, fp128 {{[%A-Za-z0-9.]+}}) +#endif float foo_float(_Complex float x) { return crealf(x); diff --git a/clang/test/CodeGen/ppc64-complex-return.c b/clang/test/CodeGen/ppc64-complex-return.c index 02bfe82d4efec4..a27286d85b8fd3 100644 --- a/clang/test/CodeGen/ppc64-complex-return.c +++ b/clang/test/CodeGen/ppc64-complex-return.c @@ -1,9 +1,20 @@ // REQUIRES: powerpc-registered-target // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -target-feature +float128 -DTEST_F128 -triple \ +// RUN: powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s \ +// RUN: --check-prefix CHECK-F128 float crealf(_Complex float); double creal(_Complex double); long double creall(_Complex long double); +#ifdef TEST_F128 +__float128 crealf128(_Complex __float128); +_Complex __float128 foo_f128(_Complex __float128 x) { + return x; +} + +// CHECK-F128: define { fp128, fp128 } @foo_f128(fp128 {{[%A-Za-z0-9.]+}}, fp128 {{[%A-Za-z0-9.]+}}) [[NUW:#[0-9]+]] { +#endif _Complex float foo_float(_Complex float x) { return x; @@ -80,6 +91,17 @@ long double bar_long_double(void) { // CHECK: extractvalue { ppc_fp128, ppc_fp128 } [[VAR3]], 0 // CHECK: extractvalue { ppc_fp128, ppc_fp128 } [[VAR3]], 1 +#ifdef TEST_F128 +__float128 bar_f128(void) { + return crealf128(foo_f128(2.0Q - 2.5Qi)); +} + +// CHECK-F128: define fp128 @bar_f128() [[NUW]] { +// CHECK-F128: [[VAR3:[%A-Za-z0-9.]+]] = call { fp128, fp128 } @foo_f128 +// CHECK-F128: extractvalue { fp128, fp128 } [[VAR3]], 0 +// CHECK-F128: extractvalue { fp128, fp128 } [[VAR3]], 1 +#endif + int bar_int(void) { return __real__(foo_int(2 - 3i)); } diff --git a/clang/test/CodeGen/sanitize-coverage.c b/clang/test/CodeGen/sanitize-coverage.c index 6fc8e39354d4f8..ea4ac9296b48fa 100644 --- a/clang/test/CodeGen/sanitize-coverage.c +++ b/clang/test/CodeGen/sanitize-coverage.c @@ -4,6 +4,9 @@ // RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=memory -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,MSAN // RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=thread -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,TSAN // RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=undefined -fsanitize-coverage=trace-pc,trace-cmp -o - | FileCheck %s --check-prefixes=CHECK,UBSAN +// +// Host armv7 is currently unsupported: https://bugs.llvm.org/show_bug.cgi?id=46117 +// XFAIL: armv7, thumbv7 int x[10]; diff --git a/clang/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp b/clang/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp index d97d82b7696575..4e41c4092bf4e3 100644 --- a/clang/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp +++ b/clang/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp @@ -110,7 +110,7 @@ struct j_wrap { }; j_wrap> j_wrap_j; // CHECK: DICompositeType(tag: DW_TAG_structure_type, name: "j" -// CHECK: DICompositeType(tag: DW_TAG_structure_type, name: "j_wrap>" +// CHECK: DICompositeType(tag: DW_TAG_structure_type, name: "j_wrap >" template struct k { diff --git a/clang/test/Driver/x86-malign-branch.c b/clang/test/Driver/x86-malign-branch.c index 5180eb0a16196c..a71b18105baa30 100644 --- a/clang/test/Driver/x86-malign-branch.c +++ b/clang/test/Driver/x86-malign-branch.c @@ -1,8 +1,10 @@ -/// Test that -malign-branch* and -mbranches-within-32B-boundaries are parsed and converted to -mllvm options. +/// Test that -malign-branch* and -mbranches-within-32B-boundaries are parsed and converted to MC options. /// Test -malign-branch-boundary= // RUN: %clang -target x86_64 -malign-branch-boundary=16 %s -c -### 2>&1 | FileCheck %s --check-prefix=BOUNDARY // BOUNDARY: "-mllvm" "-x86-align-branch-boundary=16" +// RUN: %clang -target x86_64-unknown-linux -malign-branch-boundary=16 -flto %s -### 2>&1 | FileCheck %s --check-prefix=BOUNDARY-LTO +// BOUNDARY-LTO: "-plugin-opt=-x86-align-branch-boundary=16" // RUN: %clang -target x86_64 -malign-branch-boundary=8 %s -c -### 2>&1 | FileCheck %s --check-prefix=BOUNDARY-ERR // RUN: %clang -target x86_64 -malign-branch-boundary=15 %s -c -### 2>&1 | FileCheck %s --check-prefix=BOUNDARY-ERR @@ -13,6 +15,8 @@ // TYPE0: "-mllvm" "-x86-align-branch=fused+jcc+jmp" // RUN: %clang -target x86_64 -malign-branch=fused,jcc,jmp,ret,call,indirect %s -c -### %s 2>&1 | FileCheck %s --check-prefix=TYPE1 // TYPE1: "-mllvm" "-x86-align-branch=fused+jcc+jmp+ret+call+indirect" +// RUN: %clang -target x86_64-unknown-linux -malign-branch=fused,jcc,jmp -flto %s -### %s 2>&1 | FileCheck %s --check-prefix=TYPE0-LTO +// TYPE0-LTO: "-plugin-opt=-x86-align-branch=fused+jcc+jmp" // RUN: %clang -target x86_64 -malign-branch=fused,foo,bar %s -c -### %s 2>&1 | FileCheck %s --check-prefix=TYPE-ERR // TYPE-ERR: invalid argument 'foo' to -malign-branch=; each element must be one of: fused, jcc, jmp, call, ret, indirect @@ -23,10 +27,14 @@ // PREFIX-0: "-mllvm" "-x86-pad-max-prefix-size=0" // RUN: %clang -target x86_64 -mpad-max-prefix-size=15 %s -c -### 2>&1 | FileCheck %s --check-prefix=PREFIX-15 // PREFIX-15: "-mllvm" "-x86-pad-max-prefix-size=15" +// RUN: %clang -target x86_64-unknown-linux -mpad-max-prefix-size=0 -flto %s -### 2>&1 | FileCheck %s --check-prefix=PREFIX-0-LTO +// PREFIX-0-LTO: "-plugin-opt=-x86-pad-max-prefix-size=0" /// Test -mbranches-within-32B-boundaries // RUN: %clang -target x86_64 -mbranches-within-32B-boundaries %s -c -### 2>&1 | FileCheck %s --check-prefix=32B // 32B: "-mllvm" "-x86-branches-within-32B-boundaries" +// RUN: %clang -target x86_64-unknown-linux -mbranches-within-32B-boundaries -flto %s -### 2>&1 | FileCheck %s --check-prefix=32B-LTO +// 32B-LTO: "-plugin-opt=-x86-branches-within-32B-boundaries" /// Unsupported on other targets. // RUN: %clang -target aarch64 -malign-branch=jmp %s -c -### 2>&1 | FileCheck --check-prefix=UNUSED %s diff --git a/clang/test/Frontend/fixed_point_errors.c b/clang/test/Frontend/fixed_point_errors.c index db15bd874b3169..9b600fbc2642b1 100644 --- a/clang/test/Frontend/fixed_point_errors.c +++ b/clang/test/Frontend/fixed_point_errors.c @@ -137,15 +137,15 @@ _Sat longfract_t td_sat_long_fract; // expected-error{{'_Sat' specifier _Sat longaccum_t td_sat_long_accum; // expected-error{{'_Sat' specifier is only valid on '_Fract' or '_Accum', not 'type-name'}} /* Bad suffixes */ -_Accum fk = 1.0fk; // expected-error{{invalid suffix 'fk' on integer constant}} -_Accum kk = 1.0kk; // expected-error{{invalid suffix 'kk' on integer constant}} -_Accum rk = 1.0rk; // expected-error{{invalid suffix 'rk' on integer constant}} -_Accum rk = 1.0rr; // expected-error{{invalid suffix 'rr' on integer constant}} -_Accum qk = 1.0qr; // expected-error{{invalid suffix 'qr' on integer constant}} +_Accum fk = 1.0fk; // expected-error{{invalid suffix 'fk' on fixed-point constant}} +_Accum kk = 1.0kk; // expected-error{{invalid suffix 'kk' on fixed-point constant}} +_Accum rk = 1.0rk; // expected-error{{invalid suffix 'rk' on fixed-point constant}} +_Accum rk = 1.0rr; // expected-error{{invalid suffix 'rr' on fixed-point constant}} +_Accum qk = 1.0qr; // expected-error{{invalid suffix 'qr' on fixed-point constant}} /* Using wrong exponent notation */ -_Accum dec_with_hex_exp1 = 0.1p10k; // expected-error{{invalid suffix 'p10k' on integer constant}} -_Accum dec_with_hex_exp2 = 0.1P10k; // expected-error{{invalid suffix 'P10k' on integer constant}} +_Accum dec_with_hex_exp1 = 0.1p10k; // expected-error{{invalid suffix 'p10k' on fixed-point constant}} +_Accum dec_with_hex_exp2 = 0.1P10k; // expected-error{{invalid suffix 'P10k' on fixed-point constant}} _Accum hex_with_dex_exp1 = 0x0.1e10k; // expected-error{{hexadecimal floating constant requires an exponent}} _Accum hex_with_dex_exp2 = 0x0.1E10k; // expected-error{{hexadecimal floating constant requires an exponent}} diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 9f036c94c3f8e6..5c571fb458ec5b 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -81,7 +81,7 @@ // PPC-SAME: 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, // PPC-SAME: 8548, 970, g5, a2, a2q, e500, e500mc, e5500, power3, pwr3, power4, // PPC-SAME: pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, -// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, powerpc, ppc, powerpc64, +// PPC-SAME: power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, powerpc64, // PPC-SAME: ppc64, powerpc64le, ppc64le, future // RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS diff --git a/clang/test/Modules/ExtDebugInfo.cpp b/clang/test/Modules/ExtDebugInfo.cpp index 6781810d592cce..aff2953b4bb511 100644 --- a/clang/test/Modules/ExtDebugInfo.cpp +++ b/clang/test/Modules/ExtDebugInfo.cpp @@ -85,14 +85,14 @@ void foo() { // This type is not anchored in the module by an explicit template instantiation. // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>", +// CHECK-SAME: name: "Template >", // CHECK-SAME: scope: ![[NS]], // CHECK-SAME: elements: // CHECK-SAME: identifier: "_ZTSN8DebugCXX8TemplateIlNS_6traitsIlEEEE") // This type is anchored in the module by an explicit template instantiation. // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>", +// CHECK-SAME: name: "Template >", // CHECK-SAME: scope: ![[NS]], // CHECK-SAME: flags: DIFlagFwdDecl // CHECK-SAME: identifier: "_ZTSN8DebugCXX8TemplateIiNS_6traitsIiEEEE") @@ -103,7 +103,7 @@ void foo() { // This one isn't. // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>", +// CHECK-SAME: name: "Template >", // CHECK-SAME: scope: ![[NS]], // CHECK-SAME: elements: // CHECK-SAME: templateParams: diff --git a/clang/test/Modules/ModuleDebugInfo.cpp b/clang/test/Modules/ModuleDebugInfo.cpp index 26369c89605812..e6e99ed4e53794 100644 --- a/clang/test/Modules/ModuleDebugInfo.cpp +++ b/clang/test/Modules/ModuleDebugInfo.cpp @@ -65,7 +65,7 @@ // This type is anchored by an explicit template instantiation. // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>" +// CHECK-SAME: name: "Template >" // CHECK-SAME: elements: // CHECK-SAME: templateParams: // CHECK-SAME: identifier: "_ZTSN8DebugCXX8TemplateIiNS_6traitsIiEEEE") @@ -80,7 +80,7 @@ // CHECK-SAME: identifier: "_ZTSN8DebugCXX6traitsIfEE") // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>" +// CHECK-SAME: name: "Template >" // CHECK-SAME: elements: // CHECK-SAME: templateParams: // CHECK-SAME: identifier: "_ZTSN8DebugCXX8TemplateIlNS_6traitsIlEEEE") @@ -89,7 +89,7 @@ // no mangled name here yet. // CHECK: !DICompositeType(tag: DW_TAG_class_type, -// CHECK-SAME: name: "Template>" +// CHECK-SAME: name: "Template >" // CHECK-SAME: flags: DIFlagFwdDecl // CHECK-SAME: identifier: "_ZTSN8DebugCXX8TemplateIfNS_6traitsIfEEEE") diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c index b24f8eb7050be3..ed8601636554ec 100644 --- a/clang/test/Preprocessor/init-ppc64.c +++ b/clang/test/Preprocessor/init-ppc64.c @@ -627,12 +627,30 @@ // PPCPOWER9:#define _ARCH_PWR7 1 // PPCPOWER9:#define _ARCH_PWR9 1 // +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu pwr10 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER10 %s +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu power10 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER10 %s +// +// PPCPOWER10:#define _ARCH_PPC 1 +// PPCPOWER10:#define _ARCH_PPC64 1 +// PPCPOWER10:#define _ARCH_PPCGR 1 +// PPCPOWER10:#define _ARCH_PPCSQ 1 +// PPCPOWER10:#define _ARCH_PWR10 1 +// PPCPOWER10:#define _ARCH_PWR4 1 +// PPCPOWER10:#define _ARCH_PWR5 1 +// PPCPOWER10:#define _ARCH_PWR5X 1 +// PPCPOWER10:#define _ARCH_PWR6 1 +// PPCPOWER10-NOT:#define _ARCH_PWR6X 1 +// PPCPOWER10:#define _ARCH_PWR7 1 +// PPCPOWER10:#define _ARCH_PWR8 1 +// PPCPOWER10:#define _ARCH_PWR9 1 +// // RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu future -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCFUTURE %s // // PPCFUTURE:#define _ARCH_PPC 1 // PPCFUTURE:#define _ARCH_PPC64 1 // PPCFUTURE:#define _ARCH_PPCGR 1 // PPCFUTURE:#define _ARCH_PPCSQ 1 +// PPCFUTURE:#define _ARCH_PWR10 1 // PPCFUTURE:#define _ARCH_PWR4 1 // PPCFUTURE:#define _ARCH_PWR5 1 // PPCFUTURE:#define _ARCH_PWR5X 1 diff --git a/clang/test/SemaCXX/warn-unused-value.cpp b/clang/test/SemaCXX/warn-unused-value.cpp index 98e2a4e86304d7..02bceeca133747 100644 --- a/clang/test/SemaCXX/warn-unused-value.cpp +++ b/clang/test/SemaCXX/warn-unused-value.cpp @@ -108,3 +108,33 @@ void f() { (void)sizeof(*x); // Ok } } + +static volatile char var1 = 'a'; +volatile char var2 = 'a'; +static volatile char arr1[] = "hello"; +volatile char arr2[] = "hello"; +void volatile_array() { + static volatile char var3 = 'a'; + volatile char var4 = 'a'; + static volatile char arr3[] = "hello"; + volatile char arr4[] = "hello"; + + // These all result in volatile loads in C and C++11. In C++98, they don't, + // but we suppress the warning in the case where '(void)var;' might be + // idiomatically suppressing an 'unused variable' warning. + (void)var1; + (void)var2; +#if __cplusplus < 201103L + // expected-warning@-2 {{expression result unused; assign into a variable to force a volatile load}} +#endif + (void)var3; + (void)var4; + + // None of these result in volatile loads in any language mode, and it's not + // really reasonable to assume that they would, since volatile array loads + // don't really exist anywhere. + (void)arr1; + (void)arr2; + (void)arr3; + (void)arr4; +} diff --git a/clang/unittests/StaticAnalyzer/StoreTest.cpp b/clang/unittests/StaticAnalyzer/StoreTest.cpp index c8b930bf3247ca..17b64ce622f897 100644 --- a/clang/unittests/StaticAnalyzer/StoreTest.cpp +++ b/clang/unittests/StaticAnalyzer/StoreTest.cpp @@ -15,89 +15,139 @@ namespace clang { namespace ento { namespace { +class StoreTestConsumer : public ExprEngineConsumer { +public: + StoreTestConsumer(CompilerInstance &C) : ExprEngineConsumer(C) {} + + bool HandleTopLevelDecl(DeclGroupRef DG) override { + for (const auto *D : DG) + performTest(D); + return true; + } + +private: + virtual void performTest(const Decl *D) = 0; +}; + +template class TestAction : public ASTFrontendAction { +public: + std::unique_ptr CreateASTConsumer(CompilerInstance &Compiler, + StringRef File) override { + return std::make_unique(Compiler); + } +}; + // Test that we can put a value into an int-type variable and load it // back from that variable. Test what happens if default bindings are used. -class VariableBindConsumer : public ExprEngineConsumer { - void performTest(const Decl *D) { - StoreManager &StMgr = Eng.getStoreManager(); - SValBuilder &SVB = Eng.getSValBuilder(); - MemRegionManager &MRMgr = StMgr.getRegionManager(); - const ASTContext &ACtx = Eng.getContext(); +class VariableBindConsumer : public StoreTestConsumer { + void performTest(const Decl *D) override { + StoreManager &SManager = Eng.getStoreManager(); + SValBuilder &Builder = Eng.getSValBuilder(); + MemRegionManager &MRManager = SManager.getRegionManager(); + const ASTContext &ASTCtxt = Eng.getContext(); const auto *VDX0 = findDeclByName(D, "x0"); const auto *VDY0 = findDeclByName(D, "y0"); const auto *VDZ0 = findDeclByName(D, "z0"); const auto *VDX1 = findDeclByName(D, "x1"); const auto *VDY1 = findDeclByName(D, "y1"); - assert(VDX0 && VDY0 && VDZ0 && VDX1 && VDY1); + + ASSERT_TRUE(VDX0 && VDY0 && VDZ0 && VDX1 && VDY1); const StackFrameContext *SFC = Eng.getAnalysisDeclContextManager().getStackFrame(D); - Loc LX0 = loc::MemRegionVal(MRMgr.getVarRegion(VDX0, SFC)); - Loc LY0 = loc::MemRegionVal(MRMgr.getVarRegion(VDY0, SFC)); - Loc LZ0 = loc::MemRegionVal(MRMgr.getVarRegion(VDZ0, SFC)); - Loc LX1 = loc::MemRegionVal(MRMgr.getVarRegion(VDX1, SFC)); - Loc LY1 = loc::MemRegionVal(MRMgr.getVarRegion(VDY1, SFC)); + Loc LX0 = loc::MemRegionVal(MRManager.getVarRegion(VDX0, SFC)); + Loc LY0 = loc::MemRegionVal(MRManager.getVarRegion(VDY0, SFC)); + Loc LZ0 = loc::MemRegionVal(MRManager.getVarRegion(VDZ0, SFC)); + Loc LX1 = loc::MemRegionVal(MRManager.getVarRegion(VDX1, SFC)); + Loc LY1 = loc::MemRegionVal(MRManager.getVarRegion(VDY1, SFC)); - Store StInit = StMgr.getInitialStore(SFC).getStore(); - SVal Zero = SVB.makeZeroVal(ACtx.IntTy); - SVal One = SVB.makeIntVal(1, ACtx.IntTy); - SVal NarrowZero = SVB.makeZeroVal(ACtx.CharTy); + Store StInit = SManager.getInitialStore(SFC).getStore(); + SVal Zero = Builder.makeZeroVal(ASTCtxt.IntTy); + SVal One = Builder.makeIntVal(1, ASTCtxt.IntTy); + SVal NarrowZero = Builder.makeZeroVal(ASTCtxt.CharTy); // Bind(Zero) - Store StX0 = - StMgr.Bind(StInit, LX0, Zero).getStore(); - ASSERT_EQ(Zero, StMgr.getBinding(StX0, LX0, ACtx.IntTy)); + Store StX0 = SManager.Bind(StInit, LX0, Zero).getStore(); + EXPECT_EQ(Zero, SManager.getBinding(StX0, LX0, ASTCtxt.IntTy)); // BindDefaultInitial(Zero) Store StY0 = - StMgr.BindDefaultInitial(StInit, LY0.getAsRegion(), Zero).getStore(); - ASSERT_EQ(Zero, StMgr.getBinding(StY0, LY0, ACtx.IntTy)); - ASSERT_EQ(Zero, *StMgr.getDefaultBinding(StY0, LY0.getAsRegion())); + SManager.BindDefaultInitial(StInit, LY0.getAsRegion(), Zero).getStore(); + EXPECT_EQ(Zero, SManager.getBinding(StY0, LY0, ASTCtxt.IntTy)); + EXPECT_EQ(Zero, *SManager.getDefaultBinding(StY0, LY0.getAsRegion())); // BindDefaultZero() - Store StZ0 = - StMgr.BindDefaultZero(StInit, LZ0.getAsRegion()).getStore(); + Store StZ0 = SManager.BindDefaultZero(StInit, LZ0.getAsRegion()).getStore(); // BindDefaultZero wipes the region with '0 S8b', not with out Zero. // Direct load, however, does give us back the object of the type // that we specify for loading. - ASSERT_EQ(Zero, StMgr.getBinding(StZ0, LZ0, ACtx.IntTy)); - ASSERT_EQ(NarrowZero, *StMgr.getDefaultBinding(StZ0, LZ0.getAsRegion())); + EXPECT_EQ(Zero, SManager.getBinding(StZ0, LZ0, ASTCtxt.IntTy)); + EXPECT_EQ(NarrowZero, *SManager.getDefaultBinding(StZ0, LZ0.getAsRegion())); // Bind(One) - Store StX1 = - StMgr.Bind(StInit, LX1, One).getStore(); - ASSERT_EQ(One, StMgr.getBinding(StX1, LX1, ACtx.IntTy)); + Store StX1 = SManager.Bind(StInit, LX1, One).getStore(); + EXPECT_EQ(One, SManager.getBinding(StX1, LX1, ASTCtxt.IntTy)); // BindDefaultInitial(One) Store StY1 = - StMgr.BindDefaultInitial(StInit, LY1.getAsRegion(), One).getStore(); - ASSERT_EQ(One, StMgr.getBinding(StY1, LY1, ACtx.IntTy)); - ASSERT_EQ(One, *StMgr.getDefaultBinding(StY1, LY1.getAsRegion())); + SManager.BindDefaultInitial(StInit, LY1.getAsRegion(), One).getStore(); + EXPECT_EQ(One, SManager.getBinding(StY1, LY1, ASTCtxt.IntTy)); + EXPECT_EQ(One, *SManager.getDefaultBinding(StY1, LY1.getAsRegion())); } public: - VariableBindConsumer(CompilerInstance &C) : ExprEngineConsumer(C) {} + using StoreTestConsumer::StoreTestConsumer; +}; - bool HandleTopLevelDecl(DeclGroupRef DG) override { - for (const auto *D : DG) - performTest(D); - return true; +TEST(Store, VariableBind) { + EXPECT_TRUE(tooling::runToolOnCode( + std::make_unique>(), + "void foo() { int x0, y0, z0, x1, y1; }")); +} + +class LiteralCompoundConsumer : public StoreTestConsumer { + void performTest(const Decl *D) override { + StoreManager &SManager = Eng.getStoreManager(); + SValBuilder &Builder = Eng.getSValBuilder(); + MemRegionManager &MRManager = SManager.getRegionManager(); + ASTContext &ASTCtxt = Eng.getContext(); + + using namespace ast_matchers; + + const auto *CL = findNode(D, compoundLiteralExpr()); + + const StackFrameContext *SFC = + Eng.getAnalysisDeclContextManager().getStackFrame(D); + + QualType Int = ASTCtxt.IntTy; + + // Get region for 'test' + const SubRegion *CLRegion = MRManager.getCompoundLiteralRegion(CL, SFC); + + // Get value for 'test[0]' + NonLoc Zero = Builder.makeIntVal(0, false); + loc::MemRegionVal ZeroElement( + MRManager.getElementRegion(ASTCtxt.IntTy, Zero, CLRegion, ASTCtxt)); + + Store StInit = SManager.getInitialStore(SFC).getStore(); + // Let's bind constant 1 to 'test[0]' + SVal One = Builder.makeIntVal(1, Int); + Store StX = SManager.Bind(StInit, ZeroElement, One).getStore(); + + // And make sure that we can read this binding back as it was + EXPECT_EQ(One, SManager.getBinding(StX, ZeroElement, Int)); } -}; -class VariableBindAction : public ASTFrontendAction { public: - std::unique_ptr CreateASTConsumer(CompilerInstance &Compiler, - StringRef File) override { - return std::make_unique(Compiler); - } + using StoreTestConsumer::StoreTestConsumer; }; -TEST(Store, VariableBind) { - EXPECT_TRUE(tooling::runToolOnCode(std::make_unique(), - "void foo() { int x0, y0, z0, x1, y1; }")); +TEST(Store, LiteralCompound) { + EXPECT_TRUE(tooling::runToolOnCode( + std::make_unique>(), + "void foo() { int *test = (int[]){ 1, 2, 3 }; }", "input.c")); } } // namespace diff --git a/compiler-rt/lib/asan/tests/asan_test.cpp b/compiler-rt/lib/asan/tests/asan_test.cpp index edc98ed1852026..83b0b0e8d33e93 100644 --- a/compiler-rt/lib/asan/tests/asan_test.cpp +++ b/compiler-rt/lib/asan/tests/asan_test.cpp @@ -588,6 +588,9 @@ NOINLINE void TouchStackFunc() { A[i] = i*i; } +// Disabled due to rdar://problem/62141412 +#if !(defined(__APPLE__) && defined(__i386__)) + // Test that we handle longjmp and do not report false positives on stack. TEST(AddressSanitizer, LongJmpTest) { static jmp_buf buf; @@ -597,6 +600,7 @@ TEST(AddressSanitizer, LongJmpTest) { TouchStackFunc(); } } +#endif #if !defined(_WIN32) // Only basic longjmp is available on Windows. NOINLINE void UnderscopeLongJmpFunc1(jmp_buf buf) { @@ -658,6 +662,8 @@ TEST(AddressSanitizer, UnderscopeLongJmpTest) { } } +// Disabled due to rdar://problem/62141412 +#if !(defined(__APPLE__) && defined(__i386__)) TEST(AddressSanitizer, SigLongJmpTest) { static sigjmp_buf buf; if (!sigsetjmp(buf, 1)) { @@ -668,6 +674,8 @@ TEST(AddressSanitizer, SigLongJmpTest) { } #endif +#endif + // FIXME: Why does clang-cl define __EXCEPTIONS? #if defined(__EXCEPTIONS) && !defined(_WIN32) NOINLINE void ThrowFunc() { diff --git a/compiler-rt/test/tsan/java_finalizer2.cpp b/compiler-rt/test/tsan/java_finalizer2.cpp index f2590f7c40b9d3..87528900541a84 100644 --- a/compiler-rt/test/tsan/java_finalizer2.cpp +++ b/compiler-rt/test/tsan/java_finalizer2.cpp @@ -1,5 +1,9 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s // Regression test for https://github.com/golang/go/issues/39186 + +// pthread barriers are not available on OS X +// UNSUPPORTED: darwin + #include "java.h" #include @@ -47,7 +51,7 @@ void *Ballast(void *p) { } int main() { - Heap* heap = (Heap*)calloc(sizeof(Heap), 1); + Heap* heap = (Heap*)calloc(sizeof(Heap), 2) + 1; __tsan_java_init((jptr)heap, sizeof(*heap)); __tsan_java_alloc((jptr)heap, sizeof(*heap)); // Ballast threads merely make the bug a bit easier to trigger. diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h index 6cbb7f0ccc1660..79ee5ddace8c82 100644 --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -118,41 +118,41 @@ #endif #if TEST_STD_VER >= 11 -#define TEST_ALIGNOF(...) alignof(__VA_ARGS__) -#define TEST_ALIGNAS(...) alignas(__VA_ARGS__) -#define TEST_CONSTEXPR constexpr -#define TEST_NOEXCEPT noexcept -#define TEST_NOEXCEPT_FALSE noexcept(false) -#define TEST_NOEXCEPT_COND(...) noexcept(__VA_ARGS__) -# if TEST_STD_VER >= 14 -# define TEST_CONSTEXPR_CXX14 constexpr -# else -# define TEST_CONSTEXPR_CXX14 -# endif -# if TEST_STD_VER > 14 -# define TEST_THROW_SPEC(...) -# else -# define TEST_THROW_SPEC(...) throw(__VA_ARGS__) -# endif -# if TEST_STD_VER > 17 -# define TEST_CONSTEXPR_CXX20 constexpr -# else -# define TEST_CONSTEXPR_CXX20 -# endif +# define TEST_ALIGNOF(...) alignof(__VA_ARGS__) +# define TEST_ALIGNAS(...) alignas(__VA_ARGS__) +# define TEST_CONSTEXPR constexpr +# define TEST_NOEXCEPT noexcept +# define TEST_NOEXCEPT_FALSE noexcept(false) +# define TEST_NOEXCEPT_COND(...) noexcept(__VA_ARGS__) +#else +# if defined(TEST_COMPILER_CLANG) +# define TEST_ALIGNOF(...) _Alignof(__VA_ARGS__) +# else +# define TEST_ALIGNOF(...) __alignof(__VA_ARGS__) +# endif +# define TEST_ALIGNAS(...) __attribute__((__aligned__(__VA_ARGS__))) +# define TEST_CONSTEXPR +# define TEST_NOEXCEPT throw() +# define TEST_NOEXCEPT_FALSE +# define TEST_NOEXCEPT_COND(...) +#endif + +#if TEST_STD_VER >= 17 +# define TEST_THROW_SPEC(...) #else -#if defined(TEST_COMPILER_CLANG) -# define TEST_ALIGNOF(...) _Alignof(__VA_ARGS__) +# define TEST_THROW_SPEC(...) throw(__VA_ARGS__) +#endif + +#if TEST_STD_VER >= 14 +# define TEST_CONSTEXPR_CXX14 constexpr +#else +# define TEST_CONSTEXPR_CXX14 +#endif + +#if TEST_STD_VER >= 20 +# define TEST_CONSTEXPR_CXX20 constexpr #else -# define TEST_ALIGNOF(...) __alignof(__VA_ARGS__) -#endif -#define TEST_ALIGNAS(...) __attribute__((__aligned__(__VA_ARGS__))) -#define TEST_CONSTEXPR -#define TEST_CONSTEXPR_CXX14 -#define TEST_CONSTEXPR_CXX20 -#define TEST_NOEXCEPT throw() -#define TEST_NOEXCEPT_FALSE -#define TEST_NOEXCEPT_COND(...) -#define TEST_THROW_SPEC(...) throw(__VA_ARGS__) +# define TEST_CONSTEXPR_CXX20 #endif // Sniff out to see if the underlying C library has C11 features diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index ccec90727730c2..2097234f1adb25 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -40,12 +40,14 @@ void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { idx2 = idx1; idx1 = idx3; - // Now renaming is complete. No one refers Real symbol. We could leave - // Real as-is, but if Real is written to the symbol table, that may - // contain irrelevant values. So, we copy all values from Sym to Real. - StringRef s = real->getName(); - memcpy(real, sym, sizeof(SymbolUnion)); - real->setName(s); + // Now renaming is complete, and no one refers to real. We drop real from + // .symtab and .dynsym. If real is undefined, it is important that we don't + // leave it in .dynsym, because otherwise it might lead to an undefined symbol + // error in a subsequent link. If real is defined, we could emit real as an + // alias for sym, but that could degrade the user experience of some tools + // that can print out only one symbol for each location: sym is a preferred + // name than real, but they might print out real instead. + real->isUsedInRegularObj = false; } // Find an existing symbol or create a new one. diff --git a/lld/test/ELF/lto/wrap-2.ll b/lld/test/ELF/lto/wrap-2.ll index 4c8993aa0424c7..a441c9837bdaa5 100644 --- a/lld/test/ELF/lto/wrap-2.ll +++ b/lld/test/ELF/lto/wrap-2.ll @@ -27,10 +27,6 @@ ; BIND-NEXT: Value: ; BIND-NEXT: Size: ; BIND-NEXT: Binding: Local -; BIND: Name: __real_bar -; BIND-NEXT: Value: -; BIND-NEXT: Size: -; BIND-NEXT: Binding: Local ; BIND: Name: __wrap_bar ; BIND-NEXT: Value: ; BIND-NEXT: Size: diff --git a/lld/test/ELF/wrap-no-real.s b/lld/test/ELF/wrap-no-real.s index 41a23185093be4..43d94cf91f8a1b 100644 --- a/lld/test/ELF/wrap-no-real.s +++ b/lld/test/ELF/wrap-no-real.s @@ -2,30 +2,33 @@ // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/wrap-no-real.s -o %t2.o // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/wrap-no-real2.s -o %t3.o -// RUN: ld.lld -o %t3.so -shared %t3.o +// RUN: ld.lld -o %t3.so -shared --soname=t3 %t3.o // RUN: ld.lld -o %t %t1.o %t2.o -wrap foo -// RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s - -// RUN: ld.lld -o %t %t1.o %t2.o %t3.so -wrap foo -// RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s +// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-readelf -s -x .got %t | FileCheck --check-prefix=READELF --implicit-check-not=__real_ %s // CHECK: <_start>: -// CHECK-NEXT: movl $0x11010, %edx -// CHECK-NEXT: movl $0x11010, %edx -// CHECK-NEXT: movl $0x11000, %edx +// CHECK-NEXT: movq {{.*}}(%rip), %rax # 2021a8 +// CHECK-NEXT: movq {{.*}}(%rip), %rbx # 2021a8 +// CHECK-NEXT: movq {{.*}}(%rip), %rcx # 2021b0 -// RUN: llvm-objdump -t %t | FileCheck --check-prefix=SYM %s +// READELF: 0000000000011010 0 NOTYPE GLOBAL DEFAULT ABS __wrap_foo +// READELF: 0000000000011000 0 NOTYPE GLOBAL DEFAULT ABS foo +// READELF: Hex dump of section '.got': +// READELF-NEXT: 0x[[#%x,ADDR:]] 10100100 00000000 00100100 00000000 +// RUN: ld.lld -o %t2 %t1.o %t2.o %t3.so --wrap foo +// RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=CHECK2 %s +// RUN: llvm-readelf -s -x .got %t2 | FileCheck --check-prefix=READELF --implicit-check-not=__real_ %s -// SYM: {{.*}} l .dynamic 0000000000000000 .hidden _DYNAMIC -// SYM-NEXT: 0000000000011000 g *ABS* 0000000000000000 __real_foo -// SYM-NEXT: 0000000000011010 g *ABS* 0000000000000000 __wrap_foo -// SYM-NEXT: {{.*}} g .text 0000000000000000 _start -// SYM-NEXT: 0000000000011000 g *ABS* 0000000000000000 foo +// CHECK2: <_start>: +// CHECK2-NEXT: movq {{.*}}(%rip), %rax # 2022b8 +// CHECK2-NEXT: movq {{.*}}(%rip), %rbx # 2022b8 +// CHECK2-NEXT: movq {{.*}}(%rip), %rcx # 2022c0 .global _start _start: - movl $foo, %edx - movl $__wrap_foo, %edx - movl $__real_foo, %edx + mov foo@gotpcrel(%rip), %rax + mov __wrap_foo@gotpcrel(%rip), %rbx + mov __real_foo@gotpcrel(%rip), %rcx diff --git a/lld/test/ELF/wrap.s b/lld/test/ELF/wrap.s index 5718ea45f669f9..2a3e56cb0af577 100644 --- a/lld/test/ELF/wrap.s +++ b/lld/test/ELF/wrap.s @@ -33,12 +33,7 @@ // SYM2-NEXT: Other [ // SYM2-NEXT: STV_PROTECTED // SYM2-NEXT: ] -// SYM3: Name: __real_foo -// SYM3-NEXT: Value: 0x11000 -// SYM3-NEXT: Size: -// SYM3-NEXT: Binding: Global -// SYM3-NEXT: Type: None -// SYM3-NEXT: Other: 0 +// SYM3-NOT: Name: __real_foo .global _start _start: diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index 79ccc5277d2e03..615f77b2dbcc35 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -367,8 +367,9 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { static std::mutex g_sdk_path_mutex; std::lock_guard guard(g_sdk_path_mutex); - std::string &path = g_sdk_path[sdk.GetString()]; - if (path.empty()) - path = GetXcodeSDK(sdk); - return path; + auto it = g_sdk_path.find(sdk.GetString()); + if (it != g_sdk_path.end()) + return it->second; + auto it_new = g_sdk_path.insert({sdk.GetString(), GetXcodeSDK(sdk)}); + return it_new.first->second; } diff --git a/lldb/test/API/commands/command/script/TestCommandScript.py b/lldb/test/API/commands/command/script/TestCommandScript.py index 6663c36414526a..caf97ea8db9790 100644 --- a/lldb/test/API/commands/command/script/TestCommandScript.py +++ b/lldb/test/API/commands/command/script/TestCommandScript.py @@ -14,7 +14,7 @@ class CmdPythonTestCase(TestBase): mydir = TestBase.compute_mydir(__file__) NO_DEBUG_INFO_TESTCASE = True - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test(self): self.build() self.pycmd_tests() diff --git a/lldb/test/API/commands/expression/issue_11588/Test11588.py b/lldb/test/API/commands/expression/issue_11588/Test11588.py index 8ed7797d5fffe2..eb5b86e96363d4 100644 --- a/lldb/test/API/commands/expression/issue_11588/Test11588.py +++ b/lldb/test/API/commands/expression/issue_11588/Test11588.py @@ -17,7 +17,7 @@ class Issue11581TestCase(TestBase): mydir = TestBase.compute_mydir(__file__) @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24778") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_11581_commands(self): # This is the function to remove the custom commands in order to have a # clean slate for the next test case. diff --git a/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py b/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py index d7d963390b051e..3423ec6e6ab9b0 100644 --- a/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py +++ b/lldb/test/API/commands/expression/no-deadlock/TestExprDoesntBlock.py @@ -17,6 +17,7 @@ class ExprDoesntDeadlockTestCase(TestBase): @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr17946') @add_test_categories(["basic_process"]) + @skipIfReproducer # Timeouts are not currently modeled. def test_with_run_command(self): """Test that expr will time out and allow other threads to run if it blocks.""" self.build() diff --git a/lldb/test/API/commands/expression/timeout/TestCallWithTimeout.py b/lldb/test/API/commands/expression/timeout/TestCallWithTimeout.py index 42e28a5a440a87..36ed7ce26de13e 100644 --- a/lldb/test/API/commands/expression/timeout/TestCallWithTimeout.py +++ b/lldb/test/API/commands/expression/timeout/TestCallWithTimeout.py @@ -26,6 +26,7 @@ def setUp(self): oslist=[ "windows"], bugnumber="llvm.org/pr21765") + @skipIfReproducer # Timeouts are not currently modeled. def test(self): """Test calling std::String member function.""" self.build() diff --git a/lldb/test/API/commands/expression/unwind_expression/TestUnwindExpression.py b/lldb/test/API/commands/expression/unwind_expression/TestUnwindExpression.py index de883f47f935df..3839f7d89235a8 100644 --- a/lldb/test/API/commands/expression/unwind_expression/TestUnwindExpression.py +++ b/lldb/test/API/commands/expression/unwind_expression/TestUnwindExpression.py @@ -53,6 +53,7 @@ def test_conditional_bktp(self): @add_test_categories(['pyapi']) @expectedFlakeyNetBSD + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test_unwind_expression(self): """Test unwinding from an expression.""" self.build_and_run_to_bkpt() diff --git a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py index ebb4345aca911e..48a281e096a93d 100644 --- a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py +++ b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py @@ -21,7 +21,7 @@ class AttachResumeTestCase(TestBase): @expectedFailureAll(oslist=['freebsd'], bugnumber='llvm.org/pr19310') @expectedFailureNetBSD @skipIfWindows # llvm.org/pr24778, llvm.org/pr21753 - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_attach_continue_interrupt_detach(self): """Test attach/continue/interrupt/detach""" self.build() diff --git a/lldb/test/API/commands/process/attach/TestProcessAttach.py b/lldb/test/API/commands/process/attach/TestProcessAttach.py index 792a8cee61f99d..f9b273309956c8 100644 --- a/lldb/test/API/commands/process/attach/TestProcessAttach.py +++ b/lldb/test/API/commands/process/attach/TestProcessAttach.py @@ -39,7 +39,7 @@ def test_attach_to_process_by_id(self): self.assertTrue(process, PROCESS_IS_VALID) @expectedFailureNetBSD - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_attach_to_process_from_different_dir_by_id(self): """Test attach by process id""" newdir = self.getBuildArtifact("newdir") diff --git a/lldb/test/API/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py b/lldb/test/API/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py index b08dfc78cea3a9..f4bbde755e6904 100644 --- a/lldb/test/API/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py +++ b/lldb/test/API/functionalities/breakpoint/scripted_bkpt/TestScriptedResolver.py @@ -16,7 +16,7 @@ class TestScriptedResolver(TestBase): NO_DEBUG_INFO_TESTCASE = True @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24528") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_scripted_resolver(self): """Use a scripted resolver to set a by symbol name breakpoint""" self.build() diff --git a/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py b/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py index c1184d22cf1539..619d8c9f239409 100644 --- a/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py +++ b/lldb/test/API/functionalities/conditional_break/TestConditionalBreak.py @@ -26,7 +26,7 @@ def test_with_python(self): self.build() self.do_conditional_break() - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_with_command(self): """Simulate a user using lldb commands to break on c() if called from a().""" self.build() diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py index 053183b5b5b743..54f1e8a220abf8 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py @@ -18,7 +18,7 @@ def test_connect(self): process = self.connect(target) self.assertPacketLogContains(["qProcessInfo", "qfThreadInfo"]) - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_attach_fail(self): error_msg = "mock-error-msg" diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py b/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py index 142861a37dff20..f66f58379890d5 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestRestartBug.py @@ -8,6 +8,7 @@ class TestRestartBug(GDBRemoteTestBase): @expectedFailureAll(bugnumber="llvm.org/pr24530") + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test(self): """ Test auto-continue behavior when a process is interrupted to deliver diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestWriteMemory.py b/lldb/test/API/functionalities/gdb_remote_client/TestWriteMemory.py index 73bd292463f0f4..83fa197c2fe3e7 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestWriteMemory.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestWriteMemory.py @@ -6,6 +6,7 @@ class TestWriteMemory(GDBRemoteTestBase): + @skipIfReproducer # SBProcess::WriteMemory is not instrumented. def test(self): class MyResponder(MockGDBServerResponder): diff --git a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py index 853c0b2cea201e..538f7b1734ba11 100644 --- a/lldb/test/API/functionalities/load_unload/TestLoadUnload.py +++ b/lldb/test/API/functionalities/load_unload/TestLoadUnload.py @@ -223,6 +223,7 @@ def test_lldb_process_load_and_unload_commands_with_svr4(self): self.setSvr4Support(True) self.run_lldb_process_load_and_unload_commands() + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def run_lldb_process_load_and_unload_commands(self): """Test that lldb process load/unload command work correctly.""" self.copy_shlibs_to_remote() diff --git a/lldb/test/API/functionalities/load_using_paths/TestLoadUsingPaths.py b/lldb/test/API/functionalities/load_using_paths/TestLoadUsingPaths.py index a7d5f07a097669..9e10bd3ce833d6 100644 --- a/lldb/test/API/functionalities/load_using_paths/TestLoadUsingPaths.py +++ b/lldb/test/API/functionalities/load_using_paths/TestLoadUsingPaths.py @@ -41,6 +41,7 @@ def setUp(self): @skipIfWindows # Windows doesn't have dlopen and friends, dynamic libraries work differently @expectedFlakeyNetBSD @expectedFailureAll(oslist=["linux"], archs=['arm'], bugnumber="llvm.org/pr45894") + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test_load_using_paths(self): """Test that we can load a module by providing a set of search paths.""" if self.platformIsDarwin(): diff --git a/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpNew.py b/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpNew.py index 3e1abc3353c326..012f9b67d9e33e 100644 --- a/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpNew.py +++ b/lldb/test/API/functionalities/postmortem/minidump-new/TestMiniDumpNew.py @@ -344,6 +344,7 @@ def test_deeper_stack_in_minidump(self): "linux-x86_64_not_crashed.dmp", self._linux_x86_64_not_crashed_pid) + @skipIfReproducer # VFS is a snapshot. def do_change_pid_in_minidump(self, core, newcore, offset, oldpid, newpid): """ This assumes that the minidump is breakpad generated on Linux - meaning that the PID in the file will be an ascii string part of diff --git a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py index 93597b4edae378..124d13ed97a415 100644 --- a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py +++ b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py @@ -24,6 +24,7 @@ def setUp(self): @skipIfWindows # setpgid call does not exist on Windows @expectedFailureAndroid("http://llvm.org/pr23762", api_levels=[16]) @expectedFailureNetBSD + @skipIfReproducer # File synchronization is not supported during replay. def test_setpgid(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/functionalities/show_location/TestShowLocationDwarf5.py b/lldb/test/API/functionalities/show_location/TestShowLocationDwarf5.py index 1d4bc6f1345003..76d24d5d4e521f 100644 --- a/lldb/test/API/functionalities/show_location/TestShowLocationDwarf5.py +++ b/lldb/test/API/functionalities/show_location/TestShowLocationDwarf5.py @@ -14,17 +14,9 @@ class TestTargetSourceMap(TestBase): def test_source_map(self): # Set the target soure map to map "./" to the current test directory. yaml_path = os.path.join(self.getSourceDir(), "a.yaml") - yaml_base, ext = os.path.splitext(yaml_path) - obj_path = self.getBuildArtifact(yaml_base) + obj_path = self.getBuildArtifact('a.out') self.yaml2obj(yaml_path, obj_path) - def cleanup(): - if os.path.exists(obj_path): - os.unlink(obj_path) - - # Execute the cleanup function during test case tear down. - self.addTearDownHook(cleanup) - # Create a target with the object file we just created from YAML target = self.dbg.CreateTarget(obj_path) diff --git a/lldb/test/API/functionalities/signal/TestSendSignal.py b/lldb/test/API/functionalities/signal/TestSendSignal.py index 84c41d7def6492..d06322794a6366 100644 --- a/lldb/test/API/functionalities/signal/TestSendSignal.py +++ b/lldb/test/API/functionalities/signal/TestSendSignal.py @@ -23,7 +23,7 @@ def setUp(self): bugnumber="llvm.org/pr23318: does not report running state") @expectedFailureNetBSD(bugnumber='llvm.org/pr43959') @skipIfWindows # Windows does not support signals - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_with_run_command(self): """Test that lldb command 'process signal SIGUSR1' sends a signal to the inferior process.""" self.build() diff --git a/lldb/test/API/functionalities/step_scripted/TestStepScripted.py b/lldb/test/API/functionalities/step_scripted/TestStepScripted.py index 9cc63f46e80693..1e87541960c809 100644 --- a/lldb/test/API/functionalities/step_scripted/TestStepScripted.py +++ b/lldb/test/API/functionalities/step_scripted/TestStepScripted.py @@ -18,14 +18,14 @@ def setUp(self): self.main_source_file = lldb.SBFileSpec("main.c") self.runCmd("command script import Steps.py") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_standard_step_out(self): """Tests stepping with the scripted thread plan laying over a standard thread plan for stepping out.""" self.build() self.step_out_with_scripted_plan("Steps.StepOut") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_scripted_step_out(self): """Tests stepping with the scripted thread plan laying over an another scripted thread plan for stepping out.""" @@ -65,12 +65,12 @@ def test_misspelled_plan_name(self): # Make sure we didn't let the process run: self.assertEqual(stop_id, process.GetStopID(), "Process didn't run") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_checking_variable(self): """Test that we can call SBValue API's from a scripted thread plan - using SBAPI's to step""" self.do_test_checking_variable(False) - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_checking_variable_cli(self): """Test that we can call SBValue API's from a scripted thread plan - using cli to step""" self.do_test_checking_variable(True) diff --git a/lldb/test/API/functionalities/thread/exit_during_expression/TestExitDuringExpression.py b/lldb/test/API/functionalities/thread/exit_during_expression/TestExitDuringExpression.py index bfdfdf53cdb15c..260fe596a39f28 100644 --- a/lldb/test/API/functionalities/thread/exit_during_expression/TestExitDuringExpression.py +++ b/lldb/test/API/functionalities/thread/exit_during_expression/TestExitDuringExpression.py @@ -33,12 +33,13 @@ def test_exit_after_one_thread_unwind(self): def test_exit_after_one_thread_no_unwind(self): """Test the case where we exit within the one thread timeout""" self.exiting_expression_test(False, False) - + def setUp(self): TestBase.setUp(self) self.main_source_file = lldb.SBFileSpec("main.c") self.build() - + + @skipIfReproducer # Timeouts are not currently modeled. def exiting_expression_test(self, before_one_thread_timeout , unwind): """function_to_call sleeps for g_timeout microseconds, then calls pthread_exit. This test calls function_to_call with an overall timeout of 500 @@ -46,7 +47,7 @@ def exiting_expression_test(self, before_one_thread_timeout , unwind): It also sets unwind_on_exit for the call to the unwind passed in. This allows you to have the thread exit either before the one thread timeout is passed. """ - + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(self, "Break here and cause the thread to exit", self.main_source_file) @@ -59,15 +60,15 @@ def exiting_expression_test(self, before_one_thread_timeout , unwind): var_options.SetIncludeArguments(False) var_options.SetIncludeLocals(False) var_options.SetIncludeStatics(True) - + value_list = frame.GetVariables(var_options) g_timeout = value_list.GetFirstValueByName("g_timeout") self.assertTrue(g_timeout.IsValid(), "Found g_timeout") - + error = lldb.SBError() timeout_value = g_timeout.GetValueAsUnsigned(error) self.assertTrue(error.Success(), "Couldn't get timeout value: %s"%(error.GetCString())) - + one_thread_timeout = 0 if (before_one_thread_timeout): one_thread_timeout = timeout_value * 2 @@ -78,7 +79,7 @@ def exiting_expression_test(self, before_one_thread_timeout , unwind): options.SetUnwindOnError(unwind) options.SetOneThreadTimeoutInMicroSeconds(one_thread_timeout) options.SetTimeoutInMicroSeconds(4 * timeout_value) - + result = frame.EvaluateExpression("function_to_call()", options) # Make sure the thread actually exited: @@ -103,4 +104,4 @@ def exiting_expression_test(self, before_one_thread_timeout , unwind): ret_val_value = ret_val.GetValueAsSigned(error) self.assertTrue(error.Success(), "Got ret_val's value") self.assertEqual(ret_val_value, 10, "We put the right value in ret_val") - + diff --git a/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py b/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py index e7cfa1ca14f278..b92ec90ff77d37 100644 --- a/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py +++ b/lldb/test/API/lang/cpp/thread_local/TestThreadLocal.py @@ -30,10 +30,25 @@ def test_thread_local(self): self.expect_expr("*tl_global_ptr", result_type="int", result_value="45") + # Create the filespec by which to locate our a.out module. + # + # - Use the absolute path to get the module for the current variant. + # - Use the relative path for reproducers. The modules are never + # orphaned because the SB objects are leaked intentionally. This + # causes LLDB to reuse the same module for every variant, because the + # UUID is the same for all the inferiors. FindModule below only + # compares paths and is oblivious to the fact that the UUIDs are the + # same. + if configuration.is_reproducer(): + filespec = lldb.SBFileSpec('a.out', False) + else: + filespec = lldb.SBFileSpec(exe, False) + # Now see if we emit the correct error when the TLS is not yet # initialized. Let's set a breakpoint on the first instruction # of main. - main_module = target.FindModule(lldb.SBFileSpec(exe)) + main_module = target.FindModule(filespec) + self.assertTrue(main_module, VALID_MODULE) main_address = main_module.FindSymbol("main").GetStartAddress() main_bkpt = target.BreakpointCreateBySBAddress(main_address) diff --git a/lldb/test/API/lang/objc/foundation/TestRuntimeTypes.py b/lldb/test/API/lang/objc/foundation/TestRuntimeTypes.py index 7ddaf63f34505b..7254f8ec3c6b8f 100644 --- a/lldb/test/API/lang/objc/foundation/TestRuntimeTypes.py +++ b/lldb/test/API/lang/objc/foundation/TestRuntimeTypes.py @@ -19,7 +19,7 @@ class RuntimeTypesTestCase(TestBase): oslist=["macosx"], debug_info="gmodules", bugnumber="llvm.org/pr27862") - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_break(self): """Test setting objc breakpoints using '_regexp-break' and 'breakpoint set'.""" if self.getArchitecture() != 'x86_64': diff --git a/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py b/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py index 5930ffdc958aae..cc3922ccf9f49e 100644 --- a/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py +++ b/lldb/test/API/lang/objc/hidden-ivars/TestHiddenIvars.py @@ -30,6 +30,7 @@ def setUp(self): @skipIf( debug_info=no_match("dsym"), bugnumber="This test requires a stripped binary and a dSYM") + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test_expr_stripped(self): if self.getArchitecture() == 'i386': self.skipTest("requires modern objc runtime") @@ -38,6 +39,7 @@ def test_expr_stripped(self): self.expr(True) @skipUnlessDarwin + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test_expr(self): if self.getArchitecture() == 'i386': self.skipTest("requires modern objc runtime") diff --git a/lldb/test/API/lang/objc/modules/TestObjCModules.py b/lldb/test/API/lang/objc/modules/TestObjCModules.py index 30535409a30f0b..f6f9111f7641b2 100644 --- a/lldb/test/API/lang/objc/modules/TestObjCModules.py +++ b/lldb/test/API/lang/objc/modules/TestObjCModules.py @@ -22,7 +22,7 @@ def setUp(self): @skipUnlessDarwin @skipIf(macos_version=["<", "10.12"]) - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_expr(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/lang/objc/print-obj/TestPrintObj.py b/lldb/test/API/lang/objc/print-obj/TestPrintObj.py index b908079eefcd87..dc66e788990df6 100644 --- a/lldb/test/API/lang/objc/print-obj/TestPrintObj.py +++ b/lldb/test/API/lang/objc/print-obj/TestPrintObj.py @@ -24,7 +24,7 @@ def setUp(self): # Find the line numbers to break at. self.line = line_number(self.source, '// Set a breakpoint here.') - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_print_obj(self): """ Test "print object" where another thread blocks the print object from making progress. diff --git a/lldb/test/API/macosx/version_zero/TestGetVersionZeroVersion.py b/lldb/test/API/macosx/version_zero/TestGetVersionZeroVersion.py index f7e4da73dda65c..5f9772b8fb20e4 100644 --- a/lldb/test/API/macosx/version_zero/TestGetVersionZeroVersion.py +++ b/lldb/test/API/macosx/version_zero/TestGetVersionZeroVersion.py @@ -5,7 +5,7 @@ import lldb -from lldbsuite.test import decorators +from lldbsuite.test.decorators import * import lldbsuite.test.lldbutil as lldbutil from lldbsuite.test.lldbtest import * @@ -19,6 +19,7 @@ class TestGetVersionForZero(TestBase): # each debug info format. NO_DEBUG_INFO_TESTCASE = True + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test_get_version_zero(self): """Read in a library with a version of 0.0.0. Test SBModule::GetVersion""" self.yaml2obj("libDylib.dylib.yaml", self.getBuildArtifact("libDylib.dylib")) diff --git a/lldb/test/API/python_api/hello_world/TestHelloWorld.py b/lldb/test/API/python_api/hello_world/TestHelloWorld.py index 2d38043bb45049..75a55ab1f44dc6 100644 --- a/lldb/test/API/python_api/hello_world/TestHelloWorld.py +++ b/lldb/test/API/python_api/hello_world/TestHelloWorld.py @@ -110,7 +110,7 @@ def test_with_attach_to_process_with_id_api(self): @skipIfiOSSimulator @skipIfAsan # FIXME: Hangs indefinitely. @expectedFailureNetBSD - @skipIfReproducer # Unexpected packet during replay + @skipIfReproducer # FIXME: Unexpected packet during (active) replay def test_with_attach_to_process_with_name_api(self): """Create target, spawn a process, and attach to it with process name.""" exe = '%s_%d'%(self.testMethodName, os.getpid()) diff --git a/lldb/test/API/python_api/symbol-context/TestSymbolContext.py b/lldb/test/API/python_api/symbol-context/TestSymbolContext.py index 0baf91e4a35132..cbe4eff0a5e3b5 100644 --- a/lldb/test/API/python_api/symbol-context/TestSymbolContext.py +++ b/lldb/test/API/python_api/symbol-context/TestSymbolContext.py @@ -23,6 +23,7 @@ def setUp(self): @add_test_categories(['pyapi']) @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24778") + @skipIfReproducer # FIXME: Unexpected packet during (passive) replay def test(self): """Exercise SBSymbolContext API extensively.""" self.build() diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index d8a2e343026bfb..0512133f2e995c 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -660,8 +660,8 @@ The syntax to define a numeric variable is ``[[#%,:]]`` where: * ``%`` is an optional scanf-style matching format specifier to indicate what number format to match (e.g. hex number). Currently accepted - format specifiers are ``%u``, ``%x`` and ``%X``. If absent, the format - specifier defaults to ``%u``. + format specifiers are ``%u``, ``%d``, ``%x`` and ``%X``. If absent, the + format specifier defaults to ``%u``. * ```` is the name of the numeric variable to define to the matching value. @@ -692,10 +692,11 @@ The syntax of a numeric substitution is ``[[#%,]]`` where: * an expression followed by an operator and a numeric operand. A numeric operand is a previously defined numeric variable, or an integer - literal. The supported operators are ``+`` and ``-``. Spaces are accepted - before, after and between any of these elements. - There is currently no support for operator precendence, but parentheses can - be used to change the evaluation order. + literal and have a 64-bit precision. The supported operators are ``+`` and + ``-``. Spaces are accepted before, after and between any of these elements. + Overflow and underflow are rejected. There is currently no support for + operator precendence, but parentheses can be used to change the evaluation + order. For example: diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 61a0085c6f8813..0e18dcc9f99e85 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14871,6 +14871,169 @@ Examples: %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c +Hardware-Loop Intrinsics +------------------------ + +LLVM support several intrinsics to mark a loop as a hardware-loop. They are +hints to the backend which are required to lower these intrinsics further to target +specific instructions, or revert the hardware-loop to a normal loop if target +specific restriction are not met and a hardware-loop can't be generated. + +These intrinsics may be modified in the future and are not intended to be used +outside the backend. Thus, front-end and mid-level optimizations should not be +generating these intrinsics. + + +'``llvm.set.loop.iterations.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. + +:: + + declare void @llvm.set.loop.iterations.i32(i32) + declare void @llvm.set.loop.iterations.i64(i64) + +Overview: +""""""""" + +The '``llvm.set.loop.iterations.*``' intrinsics are used to specify the +hardware-loop trip count. They are placed in the loop preheader basic block and +are marked as ``IntrNoDuplicate`` to avoid optimizers duplicating these +instructions. + +Arguments: +"""""""""" + +The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count. + +Semantics: +"""""""""" + +The '``llvm.set.loop.iterations.*``' intrinsics do not perform any arithmetic +on their operand. It's a hint to the backend that can use this to set up the +hardware-loop count with a target specific instruction, usually a move of this +value to a special register or a hardware-loop instruction. + +'``llvm.test.set.loop.iterations.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. + +:: + + declare void @llvm.test.set.loop.iterations.i32(i32) + declare void @llvm.test.set.loop.iterations.i64(i64) + +Overview: +""""""""" + +The '``llvm.test.set.loop.iterations.*``' intrinsics are used to specify the +the loop trip count, and also test that the given count is not zero, allowing +it to control entry to a while-loop. They are placed in the loop preheader's +predecessor basic block, and are marked as ``IntrNoDuplicate`` to avoid +optimizers duplicating these instructions. + +Arguments: +"""""""""" + +The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count. + +Semantics: +"""""""""" + +The '``llvm.test.set.loop.iterations.*``' intrinsics do not perform any +arithmetic on their operand. It's a hint to the backend that can use this to +set up the hardware-loop count with a target specific instruction, usually a +move of this value to a special register or a hardware-loop instruction. + +'``llvm.loop.decrement.reg.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. + +:: + + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) + declare i64 @llvm.loop.decrement.reg.i64(i64, i64) + +Overview: +""""""""" + +The '``llvm.loop.decrement.reg.*``' intrinsics are used to lower the loop +iteration counter and return an updated value that will be used in the next +loop test check. + +Arguments: +"""""""""" + +Both arguments must have identical integer types. The first operand is the +loop iteration counter. The second operand is the maximum number of elements +processed in an iteration. + +Semantics: +"""""""""" + +The '``llvm.loop.decrement.reg.*``' intrinsics do an integer ``SUB`` of its +two operands, which is not allowed to wrap. They return the remaining number of +iterations still to be executed, and can be used together with a ``PHI``, +``ICMP`` and ``BR`` to control the number of loop iterations executed. Any +optimisations are allowed to treat it is a ``SUB``, and it is supported by +SCEV, so it's the backends responsibility to handle cases where it may be +optimised. These intrinsics are marked as ``IntrNoDuplicate`` to avoid +optimizers duplicating these instructions. + + +'``llvm.loop.decrement.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. + +:: + + declare i1 @llvm.loop.decrement.i32(i32) + declare i1 @llvm.loop.decrement.i64(i64) + +Overview: +""""""""" + +The HardwareLoops pass allows the loop decrement value to be specified with an +option. It defaults to a loop decrement value of 1, but it can be an unsigned +integer value provided by this option. The '``llvm.loop.decrement.*``' +intrinsics decrement the loop iteration counter with this value, and return a +false predicate if the loop should exit, and true otherwise. +This is emitted if the loop counter is not updated via a ``PHI`` node, which +can also be controlled with an option. + +Arguments: +"""""""""" + +The integer argument is the loop decrement value used to decrement the loop +iteration counter. + +Semantics: +"""""""""" + +The '``llvm.loop.decrement.*``' intrinsics do a ``SUB`` of the loop iteration +counter with the given loop decrement value, and return false if the loop +should exit, this ``SUB`` is not allowed to wrap. The result is a condition +that is used by the conditional branch controlling the loop. + + Experimental Vector Reduction Intrinsics ---------------------------------------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index b47222eefe8a32..a55d14c0cfa28e 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -70,6 +70,10 @@ Changes to the LLVM IR behavior was undocumented. To preserve optimizations, frontends may need to be updated to generate appropriate `align` attributes and metadata. +* The DIModule metadata is extended to contain file and line number + information. This information is used to represent Fortran modules debug + info at IR level. + Changes to building LLVM ------------------------ diff --git a/llvm/include/llvm/Analysis/ML/InlineFeaturesAnalysis.h b/llvm/include/llvm/Analysis/ML/InlineFeaturesAnalysis.h new file mode 100644 index 00000000000000..694cae34bc75e8 --- /dev/null +++ b/llvm/include/llvm/Analysis/ML/InlineFeaturesAnalysis.h @@ -0,0 +1,37 @@ +#ifndef LLVM_INLINEFEATURESANALYSIS_H_ +#define LLVM_INLINEFEATURESANALYSIS_H_ + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Function; + +class InlineFeaturesAnalysis + : public AnalysisInfoMixin { +public: + static AnalysisKey Key; + struct Result { + /// Number of basic blocks + int64_t BasicBlockCount = 0; + + /// Number of blocks reached from a conditional instruction, or that are + /// 'cases' of a SwitchInstr. + // FIXME: We may want to replace this with a more meaningful metric, like + // number of conditionally executed blocks: + // 'if (a) s();' would be counted here as 2 blocks, just like + // 'if (a) s(); else s2(); s3();' would. + int64_t BlocksReachedFromConditionalInstruction = 0; + + /// Number of uses of this function, plus 1 if the function is callable + /// outside the module. + int64_t Uses = 0; + + /// Number of direct calls made from this function to other functions + /// defined in this module. + int64_t DirectCallsToDefinedFunctions = 0; + }; + Result run(const Function &F, FunctionAnalysisManager &FAM); +}; + +} // namespace llvm +#endif // LLVM_INLINEFEATURESANALYSIS_H_ \ No newline at end of file diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 8fbc9e8990b2e2..9fcceb93dbc40e 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -40,7 +40,6 @@ class ProfileSummaryInfo { private: Module &M; std::unique_ptr Summary; - bool computeSummary(); void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. Optional HotCountThreshold, ColdCountThreshold; @@ -53,33 +52,35 @@ class ProfileSummaryInfo { // percentile is above a large threshold. Optional HasLargeWorkingSetSize; // Compute the threshold for a given cutoff. - Optional computeThreshold(int PercentileCutoff); + Optional computeThreshold(int PercentileCutoff) const; // The map that caches the threshold values. The keys are the percentile // cutoff values and the values are the corresponding threshold values. - DenseMap ThresholdCache; + mutable DenseMap ThresholdCache; public: - ProfileSummaryInfo(Module &M) : M(M) {} - ProfileSummaryInfo(ProfileSummaryInfo &&Arg) - : M(Arg.M), Summary(std::move(Arg.Summary)) {} + ProfileSummaryInfo(Module &M) : M(M) { refresh(); } + ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; + + /// If no summary is present, attempt to refresh. + void refresh(); /// Returns true if profile summary is available. - bool hasProfileSummary() { return computeSummary(); } + bool hasProfileSummary() const { return Summary != nullptr; } /// Returns true if module \c M has sample profile. - bool hasSampleProfile() { + bool hasSampleProfile() const { return hasProfileSummary() && Summary->getKind() == ProfileSummary::PSK_Sample; } /// Returns true if module \c M has instrumentation profile. - bool hasInstrumentationProfile() { + bool hasInstrumentationProfile() const { return hasProfileSummary() && Summary->getKind() == ProfileSummary::PSK_Instr; } /// Returns true if module \c M has context sensitive instrumentation profile. - bool hasCSInstrumentationProfile() { + bool hasCSInstrumentationProfile() const { return hasProfileSummary() && Summary->getKind() == ProfileSummary::PSK_CSInstr; } @@ -98,84 +99,86 @@ class ProfileSummaryInfo { /// Returns the profile count for \p CallInst. Optional getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, - bool AllowSynthetic = false); + bool AllowSynthetic = false) const; /// Returns true if module \c M has partial-profile sample profile. - bool hasPartialSampleProfile(); + bool hasPartialSampleProfile() const; /// Returns true if the working set size of the code is considered huge. - bool hasHugeWorkingSetSize(); + bool hasHugeWorkingSetSize() const; /// Returns true if the working set size of the code is considered large. - bool hasLargeWorkingSetSize(); + bool hasLargeWorkingSetSize() const; /// Returns true if \p F has hot function entry. - bool isFunctionEntryHot(const Function *F); + bool isFunctionEntryHot(const Function *F) const; /// Returns true if \p F contains hot code. - bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + bool isFunctionHotInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) const; /// Returns true if \p F has cold function entry. - bool isFunctionEntryCold(const Function *F); + bool isFunctionEntryCold(const Function *F) const; /// Returns true if \p F contains only cold code. - bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + bool isFunctionColdInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) const; /// Returns true if the hotness of \p F is unknown. - bool isFunctionHotnessUnknown(const Function &F); + bool isFunctionHotnessUnknown(const Function &F) const; /// Returns true if \p F contains hot code with regard to a given hot /// percentile cutoff value. bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, const Function *F, - BlockFrequencyInfo &BFI); + BlockFrequencyInfo &BFI) const; /// Returns true if \p F contains cold code with regard to a given cold /// percentile cutoff value. bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff, const Function *F, - BlockFrequencyInfo &BFI); + BlockFrequencyInfo &BFI) const; /// Returns true if count \p C is considered hot. - bool isHotCount(uint64_t C); + bool isHotCount(uint64_t C) const; /// Returns true if count \p C is considered cold. - bool isColdCount(uint64_t C); + bool isColdCount(uint64_t C) const; /// Returns true if count \p C is considered hot with regard to a given /// hot percentile cutoff value. - bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C); + bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const; /// Returns true if count \p C is considered cold with regard to a given /// cold percentile cutoff value. - bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C); + bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; /// Returns true if BasicBlock \p BB is considered hot. - bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const; /// Returns true if BasicBlock \p BB is considered cold. - bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const; /// Returns true if BasicBlock \p BB is considered hot with regard to a given /// hot percentile cutoff value. - bool isHotBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, BlockFrequencyInfo *BFI); + bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, + BlockFrequencyInfo *BFI) const; /// Returns true if BasicBlock \p BB is considered cold with regard to a given /// cold percentile cutoff value. - bool isColdBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, BlockFrequencyInfo *BFI); + bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, + BlockFrequencyInfo *BFI) const; /// Returns true if the call site \p CB is considered hot. - bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI); + bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const; /// Returns true if call site \p CB is considered cold. - bool isColdCallSite(const CallBase &CB, BlockFrequencyInfo *BFI); + bool isColdCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const; /// Returns HotCountThreshold if set. Recompute HotCountThreshold /// if not set. - uint64_t getOrCompHotCountThreshold(); + uint64_t getOrCompHotCountThreshold() const; /// Returns ColdCountThreshold if set. Recompute HotCountThreshold /// if not set. - uint64_t getOrCompColdCountThreshold(); + uint64_t getOrCompColdCountThreshold() const; /// Returns HotCountThreshold if set. - uint64_t getHotCountThreshold() { + uint64_t getHotCountThreshold() const { return HotCountThreshold ? HotCountThreshold.getValue() : 0; } /// Returns ColdCountThreshold if set. - uint64_t getColdCountThreshold() { + uint64_t getColdCountThreshold() const { return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; } private: - template - bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff, - const Function *F, - BlockFrequencyInfo &BFI); - template - bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C); - template - bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, - BlockFrequencyInfo *BFI); + template + bool isFunctionHotOrColdInCallGraphNthPercentile( + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const; + template + bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; + template + bool isHotOrColdBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, + BlockFrequencyInfo *BFI) const; }; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 33a4b2c149c36f..df7ccac5b4b929 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -18,6 +18,8 @@ namespace llvm { +class AllocaInst; + /// Interface to access stack safety analysis results for single function. class StackSafetyInfo { public: @@ -38,6 +40,22 @@ class StackSafetyInfo { void print(raw_ostream &O, const GlobalValue &F) const; }; +class StackSafetyGlobalInfo { +public: + using GVToSSI = std::map; + +private: + GVToSSI SSGI; + +public: + StackSafetyGlobalInfo() = default; + StackSafetyGlobalInfo(GVToSSI SSGI) : SSGI(std::move(SSGI)) {} + + bool setMetadata(Module &M) const; + void print(raw_ostream &O) const; + void dump() const; +}; + /// StackSafetyInfo wrapper for the new pass manager. class StackSafetyAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; @@ -74,8 +92,6 @@ class StackSafetyInfoWrapperPass : public FunctionPass { bool runOnFunction(Function &F) override; }; -using StackSafetyGlobalInfo = std::map; - /// This pass performs the global (interprocedural) stack safety analysis (new /// pass manager). class StackSafetyGlobalAnalysis diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 1b0412bc47be42..1a17135b607881 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -311,6 +311,7 @@ enum { EM_RISCV = 243, // RISC-V EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine + EM_VE = 251, // NEC SX-Aurora VE }; // Object file classes. @@ -764,6 +765,11 @@ enum { #include "ELFRelocs/MSP430.def" }; +// ELF Relocation type for VE. +enum { +#include "ELFRelocs/VE.def" +}; + #undef ELF_RELOC // Section header. diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/VE.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/VE.def new file mode 100644 index 00000000000000..9bfdbf1b0960fb --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/VE.def @@ -0,0 +1,48 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +// Relocation types defined in following documents. +// +// - System V Application Binary Interface - VE Architecture +// Processor Supplement +// - ELF Handling For Thread-Local Storage - VE Architecture +// Processor Supplement + +ELF_RELOC(R_VE_NONE, 0) +ELF_RELOC(R_VE_REFLONG, 1) +ELF_RELOC(R_VE_REFQUAD, 2) +ELF_RELOC(R_VE_SREL32, 3) +ELF_RELOC(R_VE_HI32, 4) +ELF_RELOC(R_VE_LO32, 5) +ELF_RELOC(R_VE_PC_HI32, 6) +ELF_RELOC(R_VE_PC_LO32, 7) +ELF_RELOC(R_VE_GOT32, 8) +ELF_RELOC(R_VE_GOT_HI32, 9) +ELF_RELOC(R_VE_GOT_LO32, 10) +ELF_RELOC(R_VE_GOTOFF32, 11) +ELF_RELOC(R_VE_GOTOFF_HI32, 12) +ELF_RELOC(R_VE_GOTOFF_LO32, 13) +ELF_RELOC(R_VE_PLT32, 14) +ELF_RELOC(R_VE_PLT_HI32, 15) +ELF_RELOC(R_VE_PLT_LO32, 16) +ELF_RELOC(R_VE_RELATIVE, 17) +ELF_RELOC(R_VE_GLOB_DAT, 18) +ELF_RELOC(R_VE_JUMP_SLOT, 19) +ELF_RELOC(R_VE_COPY, 20) +ELF_RELOC(R_VE_DTPMOD64, 22) +ELF_RELOC(R_VE_DTPOFF64, 23) +// ELF_RELOC(R_VE_TPOFF64, 24) +ELF_RELOC(R_VE_TLS_GD_HI32, 25) +ELF_RELOC(R_VE_TLS_GD_LO32, 26) +// ELF_RELOC(R_VE_TLS_LD_HI32, 27) +// ELF_RELOC(R_VE_TLS_LD_LO32, 28) +// ELF_RELOC(R_VE_DTPOFF32, 29) +// ELF_RELOC(R_VE_TLS_IE_HI32, 30) +// ELF_RELOC(R_VE_TLS_IE_LO32, 31) +ELF_RELOC(R_VE_TPOFF_HI32, 32) +ELF_RELOC(R_VE_TPOFF_LO32, 33) +// ELF_RELOC(R_VE_TPOFF32, 34) +ELF_RELOC(R_VE_CALL_HI32, 35) +ELF_RELOC(R_VE_CALL_LO32, 36) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index f081a53263eff2..cf3afd8aeabc02 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -446,44 +446,67 @@ enum NodeType { /// Returns platform specific canonical encoding of a floating point number. FCANONICALIZE, - /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the - /// specified, possibly variable, elements. The number of elements is - /// required to be a power of two. The types of the operands must all be - /// the same and must match the vector element type, except that integer - /// types are allowed to be larger than the element type, in which case - /// the operands are implicitly truncated. + /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector + /// with the specified, possibly variable, elements. The number of elements + /// is required to be a power of two. The types of the operands must all be + /// the same and must match the vector element type, except that integer types + /// are allowed to be larger than the element type, in which case the operands + /// are implicitly truncated. BUILD_VECTOR, /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element - /// at IDX replaced with VAL. If the type of VAL is larger than the vector + /// at IDX replaced with VAL. If the type of VAL is larger than the vector /// element type then VAL is truncated before replacement. + /// + /// If VECTOR is a scalable vector, then IDX may be larger than the minimum + /// vector width. IDX is not first scaled by the runtime scaling factor of + /// VECTOR. INSERT_VECTOR_ELT, /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR - /// identified by the (potentially variable) element number IDX. If the - /// return type is an integer type larger than the element type of the - /// vector, the result is extended to the width of the return type. In - /// that case, the high bits are undefined. + /// identified by the (potentially variable) element number IDX. If the return + /// type is an integer type larger than the element type of the vector, the + /// result is extended to the width of the return type. In that case, the high + /// bits are undefined. + /// + /// If VECTOR is a scalable vector, then IDX may be larger than the minimum + /// vector width. IDX is not first scaled by the runtime scaling factor of + /// VECTOR. EXTRACT_VECTOR_ELT, /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of /// vector type with the same length and element type, this produces a /// concatenated vector result value, with length equal to the sum of the - /// lengths of the input vectors. + /// lengths of the input vectors. If VECTOR0 is a fixed-width vector, then + /// VECTOR1..VECTORN must all be fixed-width vectors. Similarly, if VECTOR0 + /// is a scalable vector, then VECTOR1..VECTORN must all be scalable vectors. CONCAT_VECTORS, - /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector - /// with VECTOR2 inserted into VECTOR1 at the constant element number - /// IDX, which must be a multiple of the VECTOR2 vector length. The - /// elements of VECTOR1 starting at IDX are overwritten with VECTOR2. - /// Elements IDX through vector_length(VECTOR2) must be valid VECTOR1 - /// indices. + /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 + /// inserted into VECTOR1. IDX represents the starting element number at which + /// VECTOR2 will be inserted. IDX must be a constant multiple of T's known + /// minimum vector length. Let the type of VECTOR2 be T, then if T is a + /// scalable vector, IDX is first scaled by the runtime scaling factor of T. + /// The elements of VECTOR1 starting at IDX are overwritten with VECTOR2. + /// Elements IDX through (IDX + num_elements(T) - 1) must be valid VECTOR1 + /// indices. If this condition cannot be determined statically but is false at + /// runtime, then the result vector is undefined. + /// + /// This operation supports inserting a fixed-width vector into a scalable + /// vector, but not the other way around. INSERT_SUBVECTOR, - /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an - /// vector value) starting with the constant element number IDX, which - /// must be a multiple of the result vector length. Elements IDX through - /// vector_length(VECTOR) must be valid VECTOR indices. + /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR. + /// Let the result type be T, then IDX represents the starting element number + /// from which a subvector of type T is extracted. IDX must be a constant + /// multiple of T's known minimum vector length. If T is a scalable vector, + /// IDX is first scaled by the runtime scaling factor of T. Elements IDX + /// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this + /// condition cannot be determined statically but is false at runtime, then + /// the result vector is undefined. + /// + /// This operation supports extracting a fixed-width vector from a scalable + /// vector, but not the other way around. EXTRACT_SUBVECTOR, /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index c3d737ca523932..d35fb171132344 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -14,7 +14,6 @@ #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" -#include "llvm/Support/WithColor.h" #include namespace llvm { diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index a76e29132acccb..58365aa2b76458 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -396,6 +396,9 @@ class AttributeList { static AttributeList get(LLVMContext &C, ArrayRef Attrs); static AttributeList get(LLVMContext &C, unsigned Index, ArrayRef Kinds); + static AttributeList get(LLVMContext &C, unsigned Index, + ArrayRef Kinds, + ArrayRef Values); static AttributeList get(LLVMContext &C, unsigned Index, ArrayRef Kind); static AttributeList get(LLVMContext &C, unsigned Index, diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index d63ca34c573b8c..d1c7d126b5a9e7 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -573,6 +573,8 @@ namespace llvm { /// implicitly uniques the values returned. DISubrange *getOrCreateSubrange(int64_t Lo, int64_t Count); DISubrange *getOrCreateSubrange(int64_t Lo, Metadata *CountNode); + DISubrange *getOrCreateSubrange(Metadata *Count, Metadata *LowerBound, + Metadata *UpperBound, Metadata *Stride); /// Create a new descriptor for the specified variable. /// \param Context Variable scope. diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 7dca44247c04f1..900a4b561cda9c 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -287,12 +287,8 @@ class DISubrange : public DINode { friend class LLVMContextImpl; friend class MDNode; - int64_t LowerBound; - - DISubrange(LLVMContext &C, StorageType Storage, Metadata *Node, - int64_t LowerBound, ArrayRef Ops) - : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops), - LowerBound(LowerBound) {} + DISubrange(LLVMContext &C, StorageType Storage, ArrayRef Ops) + : DINode(C, DISubrangeKind, Storage, dwarf::DW_TAG_subrange_type, Ops) {} ~DISubrange() = default; @@ -304,8 +300,14 @@ class DISubrange : public DINode { int64_t LowerBound, StorageType Storage, bool ShouldCreate = true); + static DISubrange *getImpl(LLVMContext &Context, Metadata *CountNode, + Metadata *LowerBound, Metadata *UpperBound, + Metadata *Stride, StorageType Storage, + bool ShouldCreate = true); + TempDISubrange cloneImpl() const { - return getTemporary(getContext(), getRawCountNode(), getLowerBound()); + return getTemporary(getContext(), getRawCountNode(), getRawLowerBound(), + getRawUpperBound(), getRawStride()); } public: @@ -315,25 +317,33 @@ class DISubrange : public DINode { DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0), (CountNode, LowerBound)) - TempDISubrange clone() const { return cloneImpl(); } + DEFINE_MDNODE_GET(DISubrange, + (Metadata * CountNode, Metadata *LowerBound, + Metadata *UpperBound, Metadata *Stride), + (CountNode, LowerBound, UpperBound, Stride)) - int64_t getLowerBound() const { return LowerBound; } + TempDISubrange clone() const { return cloneImpl(); } Metadata *getRawCountNode() const { return getOperand(0).get(); } + Metadata *getRawLowerBound() const { return getOperand(1).get(); } + + Metadata *getRawUpperBound() const { return getOperand(2).get(); } + + Metadata *getRawStride() const { return getOperand(3).get(); } + typedef PointerUnion CountType; + typedef PointerUnion BoundType; - CountType getCount() const { - if (auto *MD = dyn_cast(getRawCountNode())) - return CountType(cast(MD->getValue())); + CountType getCount() const; - if (auto *DV = dyn_cast(getRawCountNode())) - return CountType(DV); + BoundType getLowerBound() const; - return CountType(); - } + BoundType getUpperBound() const; + + BoundType getStride() const; static bool classof(const Metadata *MD) { return MD->getMetadataID() == DISubrangeKind; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index a2553cdeec6c24..78409df8f816a0 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -62,44 +62,57 @@ def Commutative : IntrinsicProperty; // Throws - This intrinsic can throw. def Throws : IntrinsicProperty; +// Attribute index needs to match `AttrIndex` defined `Attributes.h`. +class AttrIndex { + int Value = idx; +} +def FuncIndex : AttrIndex<-1>; +def RetIndex : AttrIndex<0>; +class ArgIndex : AttrIndex; + // NoCapture - The specified argument pointer is not captured by the intrinsic. -class NoCapture : IntrinsicProperty { - int ArgNo = argNo; +class NoCapture : IntrinsicProperty { + int ArgNo = idx.Value; } // NoAlias - The specified argument pointer is not aliasing other "noalias" pointer // arguments of the intrinsic wrt. the intrinsic scope. -class NoAlias : IntrinsicProperty { - int ArgNo = argNo; +class NoAlias : IntrinsicProperty { + int ArgNo = idx.Value; +} + +class Align : IntrinsicProperty { + int ArgNo = idx.Value; + int Align = align; } // Returned - The specified argument is always the return value of the // intrinsic. -class Returned : IntrinsicProperty { - int ArgNo = argNo; +class Returned : IntrinsicProperty { + int ArgNo = idx.Value; } // ImmArg - The specified argument must be an immediate. -class ImmArg : IntrinsicProperty { - int ArgNo = argNo; +class ImmArg : IntrinsicProperty { + int ArgNo = idx.Value; } // ReadOnly - The specified argument pointer is not written to through the // pointer by the intrinsic. -class ReadOnly : IntrinsicProperty { - int ArgNo = argNo; +class ReadOnly : IntrinsicProperty { + int ArgNo = idx.Value; } // WriteOnly - The intrinsic does not read memory through the specified // argument pointer. -class WriteOnly : IntrinsicProperty { - int ArgNo = argNo; +class WriteOnly : IntrinsicProperty { + int ArgNo = idx.Value; } // ReadNone - The specified argument pointer is not dereferenced by the // intrinsic. -class ReadNone : IntrinsicProperty { - int ArgNo = argNo; +class ReadNone : IntrinsicProperty { + int ArgNo = idx.Value; } def IntrNoReturn : IntrinsicProperty; @@ -356,7 +369,8 @@ def int_gcread : Intrinsic<[llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_gcwrite : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty], - [IntrArgMemOnly, NoCapture<1>, NoCapture<2>]>; + [IntrArgMemOnly, NoCapture>, + NoCapture>]>; //===------------------- ObjC ARC runtime Intrinsics --------------------===// // @@ -432,9 +446,11 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], //===--------------------- Code Generator Intrinsics ----------------------===// // -def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; +def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; -def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; +def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; @@ -452,7 +468,7 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>; // to an escaped allocation indicated by the index. def int_localrecover : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; // Given the frame pointer passed into an SEH filter function, returns a // pointer to the local variable area suitable for use with llvm.localrecover. @@ -478,8 +494,9 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, // memory while not impeding optimization. def int_prefetch : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<0>, NoCapture<0>, - ImmArg<1>, ImmArg<2>]>; + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, + ReadOnly>, NoCapture>, + ImmArg>, ImmArg>]>; def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; @@ -520,10 +537,13 @@ def int_call_preallocated_arg : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i3 // def int_memcpy : Intrinsic<[], - [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i1_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, - NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, + llvm_i1_ty], + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, + WriteOnly>, ReadOnly>, + ImmArg>]>; // Memcpy semantic that is guaranteed to be inlined. // In particular this means that the generated code is not allowed to call any @@ -531,23 +551,25 @@ def int_memcpy : Intrinsic<[], // The third argument (specifying the size) must be a constant. def int_memcpy_inline : Intrinsic<[], - [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ], - [ IntrArgMemOnly, IntrWillReturn, - NoCapture<0>, NoCapture<1>, - NoAlias<0>, NoAlias<1>, - WriteOnly<0>, ReadOnly<1>, - ImmArg<2>, ImmArg<3> ]>; + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, NoCapture>, + NoAlias>, NoAlias>, + WriteOnly>, ReadOnly>, + ImmArg>, ImmArg>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, - ReadOnly<1>, ImmArg<3>]>; + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, NoCapture>, + ReadOnly>, ImmArg>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, NoCapture<0>, - WriteOnly<0>, ImmArg<3>]>; + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, + NoCapture>, WriteOnly>, + ImmArg>]>; // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. @@ -614,7 +636,9 @@ def int_maximum : Intrinsic<[llvm_anyfloat_ty], def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>, ImmArg>, + ImmArg>]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Access to Floating Point Environment -----------------===// @@ -827,7 +851,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; } -let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<1>] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, + ImmArg>] in { def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } @@ -917,12 +942,12 @@ def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty], // def int_init_trampoline : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<0>]>, - GCCBuiltin<"__builtin_init_trampoline">; + [IntrArgMemOnly, NoCapture>]>, + GCCBuiltin<"__builtin_init_trampoline">; def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>, - GCCBuiltin<"__builtin_adjust_trampoline">; + GCCBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // @@ -969,52 +994,64 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty], // def int_smul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + Commutative, ImmArg>]>; def int_umul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + Commutative, ImmArg>]>; def int_sdiv_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_udiv_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; //===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===// // def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + Commutative, ImmArg>]>; def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, IntrWillReturn, + Commutative, ImmArg>]>; def int_sdiv_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_udiv_fix_sat : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; //===------------------------- Memory Use Markers -------------------------===// // def int_lifetime_start : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, + ImmArg>]>; def int_lifetime_end : Intrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, + ImmArg>]>; def int_invariant_start : Intrinsic<[llvm_descriptor_ty], [llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<1>, ImmArg<0>]>; + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, + ImmArg>]>; def int_invariant_end : Intrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, IntrWillReturn, NoCapture<2>, ImmArg<1>]>; + [IntrArgMemOnly, IntrWillReturn, + NoCapture>, + ImmArg>]>; // launder.invariant.group can't be marked with 'readnone' (IntrNoMem), // because it would cause CSE of two barriers with the same argument. @@ -1061,13 +1098,17 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty], [llvm_i64_ty, llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_vararg_ty], - [Throws, ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>]>; + [Throws, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>]>; def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty], [IntrReadMem]>; def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty], - [llvm_token_ty, llvm_i32_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<1>, ImmArg<2>]>; + [llvm_token_ty, llvm_i32_ty, + llvm_i32_ty], + [IntrReadMem, ImmArg>, + ImmArg>]>; //===------------------------ Coroutine Intrinsics ---------------===// // These are documented in docs/Coroutines.rst @@ -1077,7 +1118,8 @@ def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty], def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrReadMem, - ReadNone<1>, ReadOnly<2>, NoCapture<2>]>; + ReadNone>, ReadOnly>, + NoCapture>]>; def int_coro_id_retcon : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], @@ -1088,11 +1130,12 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], - [WriteOnly<1>]>; + [WriteOnly>]>; def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], - [IntrReadMem, IntrArgMemOnly, ReadOnly<1>, - NoCapture<1>]>; + [IntrReadMem, IntrArgMemOnly, + ReadOnly>, + NoCapture>]>; def int_coro_end : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty], []>; def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; @@ -1110,23 +1153,26 @@ def int_coro_alloca_get : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], []>; def int_coro_alloca_free : Intrinsic<[], [llvm_token_ty], []>; def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty], - [IntrNoMem, ReadNone<0>, ReadNone<1>]>; + [IntrNoMem, ReadNone>, + ReadNone>]>; // Coroutine Manipulation Intrinsics. def int_coro_resume : Intrinsic<[], [llvm_ptr_ty], [Throws]>; def int_coro_destroy : Intrinsic<[], [llvm_ptr_ty], [Throws]>; def int_coro_done : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>; + [IntrArgMemOnly, ReadOnly>, + NoCapture>]>; def int_coro_promise : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem, NoCapture<0>]>; + [IntrNoMem, NoCapture>]>; // Coroutine Lowering Intrinsics. Used internally by coroutine passes. def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty], - [IntrReadMem, IntrArgMemOnly, ReadOnly<0>, - NoCapture<0>]>; + [IntrReadMem, IntrArgMemOnly, + ReadOnly>, + NoCapture>]>; ///===-------------------------- Other Intrinsics --------------------------===// // @@ -1255,24 +1301,26 @@ def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, IntrWillReturn, ImmArg<2>]>; + [IntrArgMemOnly, IntrWillReturn, ImmArg>]>; def int_masked_load : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, + ImmArg>]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn, ImmArg<1>]>; + [IntrReadMem, IntrWillReturn, + ImmArg>]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrWillReturn, ImmArg<2>]>; + [IntrWillReturn, ImmArg>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, @@ -1303,20 +1351,24 @@ def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; def int_hwasan_check_memaccess : - Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; + Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, ImmArg>]>; def int_hwasan_check_memaccess_shortgranules : - Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>; + Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, ImmArg>]>; // Xray intrinsics //===----------------------------------------------------------------------===// // Custom event logging for x-ray. // Takes a pointer to a string and the length of the string. def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], - [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; + [IntrWriteMem, NoCapture>, + ReadOnly>]>; // Typed event logging for x-ray. // Takes a numeric type tag, a pointer to a string and the length of the string. def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty], - [NoCapture<1>, ReadOnly<1>, IntrWriteMem]>; + [IntrWriteMem, NoCapture>, + ReadOnly>]>; //===----------------------------------------------------------------------===// //===------ Memory intrinsics with element-wise atomicity guarantees ------===// @@ -1325,30 +1377,25 @@ def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty], // @llvm.memcpy.element.unordered.atomic.*(dest, src, length, elementsize) def int_memcpy_element_unordered_atomic : Intrinsic<[], - [ - llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty - ], - [ - IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, - ReadOnly<1>, ImmArg<3> - ]>; + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty], + [IntrArgMemOnly, IntrWillReturn, NoCapture>, + NoCapture>, WriteOnly>, + ReadOnly>, ImmArg>]>; // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize) def int_memmove_element_unordered_atomic : Intrinsic<[], - [ - llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty - ], - [ - IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, WriteOnly<0>, - ReadOnly<1>, ImmArg<3> - ]>; + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty], + [IntrArgMemOnly, IntrWillReturn, NoCapture>, + NoCapture>, WriteOnly>, + ReadOnly>, ImmArg>]>; // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) def int_memset_element_unordered_atomic - : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], - [ IntrWriteMem, IntrArgMemOnly, IntrWillReturn, NoCapture<0>, WriteOnly<0>, - ImmArg<3> ]>; + : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, + NoCapture>, WriteOnly>, + ImmArg>]>; //===------------------------ Reduction Intrinsics ------------------------===// // @@ -1390,7 +1437,8 @@ def int_matrix_transpose : Intrinsic<[llvm_anyvector_ty], llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, - IntrWillReturn, ImmArg<1>, ImmArg<2>]>; + IntrWillReturn, ImmArg>, + ImmArg>]>; def int_matrix_multiply : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, @@ -1399,8 +1447,9 @@ def int_matrix_multiply : Intrinsic<[llvm_anyvector_ty], llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, - IntrWillReturn, ImmArg<2>, ImmArg<3>, - ImmArg<4>]>; + IntrWillReturn, ImmArg>, + ImmArg>, + ImmArg>]>; def int_matrix_columnwise_load : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, @@ -1408,7 +1457,9 @@ def int_matrix_columnwise_load : Intrinsic<[llvm_anyvector_ty], llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly, IntrReadMem, - IntrWillReturn, ImmArg<2>, ImmArg<3>]>; + IntrWillReturn, + ImmArg>, + ImmArg>]>; def int_matrix_columnwise_store : Intrinsic<[], [llvm_anyvector_ty, @@ -1417,8 +1468,10 @@ def int_matrix_columnwise_store : Intrinsic<[], llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly, IntrWillReturn, - IntrWriteMem, WriteOnly<1>, - ImmArg<3>, ImmArg<4>]>; + IntrWriteMem, + WriteOnly>, + ImmArg>, + ImmArg>]>; //===---------- Intrinsics to control hardware supported loops ----------===// @@ -1452,22 +1505,26 @@ def int_loop_decrement_reg : //===----- Intrinsics that are used to provide predicate information -----===// def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], - [IntrNoMem, Returned<0>]>; + [IntrNoMem, Returned>]>; //===------- Intrinsics that are used to preserve debug information -------===// def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, + ImmArg>, + ImmArg>]>; def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, + ImmArg>]>; def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, - ImmArg<2>]>; + [IntrNoMem, + ImmArg>, + ImmArg>]>; //===---------- Intrinsics to query properties of scalable vectors --------===// def int_vscale : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 384e3209f5f5ff..d00456123f519e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -487,7 +487,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_Store_Lane_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<2>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Load_Intrinsic : Intrinsic<[LLVMMatchType<0>, llvm_anyvector_ty], @@ -501,11 +501,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Vec_Store_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMAnyPointerType>], - [IntrArgMemOnly, NoCapture<2>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_Store_Lane_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<3>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Load_Intrinsic : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], @@ -519,12 +519,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_3Vec_Store_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMAnyPointerType>], - [IntrArgMemOnly, NoCapture<3>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_Store_Lane_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<4>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Load_Intrinsic : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, @@ -542,12 +542,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMAnyPointerType>], - [IntrArgMemOnly, NoCapture<4>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_Store_Lane_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], - [IntrArgMemOnly, NoCapture<5>]>; + [IntrArgMemOnly, NoCapture>]>; } // Memory ops @@ -744,20 +744,20 @@ def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], // ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset // It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp. def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; // Update allocation tags for the memory range to match the tag in the pointer argument. def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], - [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; // Update allocation tags for the memory range to match the tag in the pointer argument, // and set memory contents to zero. def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], - [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; // Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values. def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], - [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + [IntrWriteMem, IntrArgMemOnly, NoCapture>, WriteOnly>]>; } // Transactional Memory Extension (TME) Intrinsics @@ -768,7 +768,7 @@ def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>; def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, - Intrinsic<[], [llvm_i64_ty], [ImmArg<0>]>; + Intrinsic<[], [llvm_i64_ty], [ImmArg>]>; def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, Intrinsic<[llvm_i64_ty], [], @@ -800,26 +800,26 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], - [IntrArgMemOnly, NoCapture<2>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_2Vec_PredStore_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>], - [IntrArgMemOnly, NoCapture<3>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_3Vec_PredStore_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>], - [IntrArgMemOnly, NoCapture<4>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_4Vec_PredStore_Intrinsic : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>], - [IntrArgMemOnly, NoCapture<5>]>; + [IntrArgMemOnly, NoCapture>]>; class AdvSIMD_SVE_Index_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -839,7 +839,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_3VectorArgIndexed_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -847,7 +847,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_Pred1VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -895,7 +895,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; class AdvSIMD_SVE_Saturating_N_Intrinsic : Intrinsic<[T], @@ -905,7 +905,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic : Intrinsic<[T], [T, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; class AdvSIMD_SVE_CNT_Intrinsic : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], @@ -926,7 +926,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_ShiftWide_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -946,7 +946,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_CMLA_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -955,7 +955,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_CMLA_LANE_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -964,7 +964,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; class AdvSIMD_SVE_DUP_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1011,7 +1011,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_SVE_PTRUE_Intrinsic : Intrinsic<[llvm_anyvector_ty], [llvm_i32_ty], - [IntrNoMem, ImmArg<0>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], @@ -1041,7 +1041,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_SVE_CNTB_Intrinsic : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], - [IntrNoMem, ImmArg<0>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_CNTP_Intrinsic : Intrinsic<[llvm_i64_ty], @@ -1061,7 +1061,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_PTEST_Intrinsic : Intrinsic<[llvm_i1_ty], @@ -1086,7 +1086,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; class SVE2_2VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1099,7 +1099,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; class SVE2_2VectorArg_Wide_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1127,7 +1127,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class SVE2_1VectorArg_Narrowing_Intrinsic : Intrinsic<[LLVMSubdivide2VectorType<0>], @@ -1154,13 +1154,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class SVE2_1VectorArg_Imm_Narrowing_Intrinsic : Intrinsic<[LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; class SVE2_2VectorArg_Imm_Narrowing_Intrinsic : Intrinsic<[LLVMSubdivide2VectorType<0>], [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; class SVE2_CONFLICT_DETECT_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1173,7 +1173,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class AdvSIMD_SVE_CDOT_LANE_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1182,7 +1182,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMSubdivide4VectorType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; // NOTE: There is no relationship between these intrinsics beyond an attempt // to reuse currently identical class definitions. @@ -1283,7 +1283,7 @@ class SVE_gather_prf_SV llvm_anyvector_ty, // Offsets llvm_i32_ty // Prfop ], - [IntrInaccessibleMemOrArgMemOnly, NoCapture<1>, ImmArg<3>]>; + [IntrInaccessibleMemOrArgMemOnly, NoCapture>, ImmArg>]>; class SVE_gather_prf_VS : Intrinsic<[], @@ -1293,7 +1293,7 @@ class SVE_gather_prf_VS llvm_i64_ty, // Scalar offset llvm_i32_ty // Prfop ], - [IntrInaccessibleMemOrArgMemOnly, ImmArg<3>]>; + [IntrInaccessibleMemOrArgMemOnly, ImmArg>]>; class SVE_MatMul_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -1329,7 +1329,7 @@ def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic; def int_aarch64_sve_prf : Intrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, ImmArg<2>]>; + [IntrArgMemOnly, ImmArg>]>; // Scalar + 32-bit scaled offset vector, zero extend, packed and // unpacked. diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 8c235159424bb6..14db97b04778cf 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -142,22 +142,22 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named def int_amdgcn_dispatch_ptr : Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable]>; + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : GCCBuiltin<"__builtin_amdgcn_queue_ptr">, Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable]>; + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_kernarg_segment_ptr : GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable]>; + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicitarg_ptr : GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable]>; + [Align, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, @@ -170,14 +170,14 @@ def int_amdgcn_dispatch_id : def int_amdgcn_implicit_buffer_ptr : GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType], [], - [IntrNoMem, IntrSpeculatable]>; + [Align, IntrNoMem, IntrSpeculatable]>; // Set EXEC to the 64-bit value given. // This is always moved to the beginning of the basic block. // FIXME: Should be mangled for wave size. def int_amdgcn_init_exec : Intrinsic<[], [llvm_i64_ty], // 64-bit literal constant - [IntrConvergent, ImmArg<0>]>; + [IntrConvergent, ImmArg>]>; // Set EXEC according to a thread count packed in an SGPR input: // thread_count = (input >> bitoffset) & 0x7f; @@ -185,7 +185,7 @@ def int_amdgcn_init_exec : Intrinsic<[], def int_amdgcn_init_exec_from_input : Intrinsic<[], [llvm_i32_ty, // 32-bit SGPR input llvm_i32_ty], // bit offset of the thread count - [IntrConvergent, ImmArg<1>]>; + [IntrConvergent, ImmArg>]>; def int_amdgcn_wavefrontsize : GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, @@ -200,10 +200,10 @@ def int_amdgcn_wavefrontsize : // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], - [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; + [ImmArg>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], - [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>; + [ImmArg>, IntrNoMem, IntrHasSideEffects]>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent]>; @@ -212,7 +212,7 @@ def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">, Intrinsic<[], [], [IntrConvergent]>; def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, - Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; + Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; def int_amdgcn_div_scale : Intrinsic< // 1st parameter: Numerator @@ -221,7 +221,7 @@ def int_amdgcn_div_scale : Intrinsic< // (0 = Denominator, 1 = Numerator). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<2>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], @@ -384,7 +384,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "", + [IntrArgMemOnly, NoCapture>, ImmArg>, ImmArg>, ImmArg>], "", [SDNPMemOperand] >; @@ -399,7 +399,7 @@ class AMDGPULDSF32Intrin : llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>] + [IntrArgMemOnly, NoCapture>, ImmArg>, ImmArg>, ImmArg>] >; // FIXME: The m0 argument should be moved after the normal arguments @@ -416,9 +416,9 @@ class AMDGPUDSOrderedIntrinsic : Intrinsic< // gfx10: bits 24-27 indicate the number of active threads/dwords llvm_i1_ty, // wave release, usually set to 1 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction - [NoCapture<0>, - ImmArg<2>, ImmArg<3>, ImmArg<4>, - ImmArg<5>, ImmArg<6>, ImmArg<7> + [NoCapture>, + ImmArg>, ImmArg>, ImmArg>, + ImmArg>, ImmArg>, ImmArg> ] >; @@ -426,7 +426,7 @@ class AMDGPUDSAppendConsumedIntrinsic : Intrinsic< [llvm_i32_ty], [llvm_anyptr_ty, // LDS or GDS ptr llvm_i1_ty], // isVolatile - [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>], + [IntrConvergent, IntrArgMemOnly, NoCapture>, ImmArg>], "", [SDNPMemOperand] >; @@ -698,10 +698,10 @@ class AMDGPUImageDimIntrinsic.DmaskArgIndex>]), - !if(P_.IsSample, [ImmArg.UnormArgIndex>], []), - [ImmArg.TexFailCtrlArgIndex>, - ImmArg.CachePolicyArgIndex>]), + !if(P_.IsAtomic, [], [ImmArg.DmaskArgIndex>>]), + !if(P_.IsSample, [ImmArg.UnormArgIndex>>], []), + [ImmArg.TexFailCtrlArgIndex>>, + ImmArg.CachePolicyArgIndex>>]), "", sdnodeprops>, AMDGPURsrcIntrinsic { @@ -881,7 +881,7 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; @@ -891,7 +891,7 @@ def int_amdgcn_s_buffer_load : Intrinsic < [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // byte offset(SGPR/imm) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc) - [IntrNoMem, ImmArg<2>]>, + [IntrNoMem, ImmArg>]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_store_byte : Intrinsic < @@ -924,7 +924,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -945,7 +945,7 @@ class AMDGPURawBufferLoad : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; @@ -959,7 +959,7 @@ class AMDGPURawAtomicBufferLoad : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrArgMemOnly, ImmArg<3>], "", [SDNPMemOperand]>, + [IntrArgMemOnly, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_atomic_buffer_load : AMDGPURawAtomicBufferLoad; @@ -973,7 +973,7 @@ class AMDGPUStructBufferLoad : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; @@ -988,7 +988,7 @@ class AMDGPURawBufferStore : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; @@ -1004,7 +1004,7 @@ class AMDGPUStructBufferStore : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; @@ -1016,7 +1016,7 @@ class AMDGPURawBufferAtomic : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg<4>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; @@ -1038,7 +1038,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg<5>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; class AMDGPUStructBufferAtomic : Intrinsic < @@ -1049,7 +1049,7 @@ class AMDGPUStructBufferAtomic : Intrinsic < llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg<5>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; @@ -1072,7 +1072,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [ImmArg<6>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // Obsolescent tbuffer intrinsics. @@ -1087,8 +1087,8 @@ def int_amdgcn_tbuffer_load : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>, - ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>, ImmArg>, + ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_tbuffer_store : Intrinsic < @@ -1103,8 +1103,8 @@ def int_amdgcn_tbuffer_store : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>, - ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>, ImmArg>, + ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; // New tbuffer intrinsics, with: @@ -1121,7 +1121,7 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_tbuffer_store : Intrinsic < @@ -1135,7 +1135,7 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_tbuffer_load : Intrinsic < @@ -1149,7 +1149,7 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_tbuffer_store : Intrinsic < @@ -1164,7 +1164,7 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic < // bit 1 = slc, // bit 2 = dlc on gfx10+), // swizzled buffer (bit 3 = swz)) - [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg>, ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; class AMDGPUBufferAtomic : Intrinsic < @@ -1174,7 +1174,7 @@ class AMDGPUBufferAtomic : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg<4>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; @@ -1194,7 +1194,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [ImmArg<5>], "", [SDNPMemOperand]>, + [ImmArg>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; } // defset AMDGPUBufferIntrinsics @@ -1211,7 +1211,7 @@ def int_amdgcn_exp : Intrinsic <[], [ llvm_i1_ty, // done llvm_i1_ty // vm ], - [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly] + [ImmArg>, ImmArg>, ImmArg>, ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with compr bit set. @@ -1222,7 +1222,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [ LLVMMatchType<0>, // src1 llvm_i1_ty, // done llvm_i1_ty], // vm - [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly] + [ImmArg>, ImmArg>, ImmArg>, ImmArg>, IntrWriteMem, IntrInaccessibleMemOnly] >; def int_amdgcn_buffer_wbinvl1_sc : @@ -1243,23 +1243,23 @@ def int_amdgcn_s_memtime : def int_amdgcn_s_sleep : GCCBuiltin<"__builtin_amdgcn_s_sleep">, - Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { + Intrinsic<[], [llvm_i32_ty], [ImmArg>]> { } def int_amdgcn_s_incperflevel : GCCBuiltin<"__builtin_amdgcn_s_incperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { + Intrinsic<[], [llvm_i32_ty], [ImmArg>]> { } def int_amdgcn_s_decperflevel : GCCBuiltin<"__builtin_amdgcn_s_decperflevel">, - Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> { + Intrinsic<[], [llvm_i32_ty], [ImmArg>]> { } def int_amdgcn_s_getreg : GCCBuiltin<"__builtin_amdgcn_s_getreg">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>] + [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg>] >; // int_amdgcn_s_getpc is provided to allow a specific style of position @@ -1278,7 +1278,7 @@ def int_amdgcn_interp_mov : GCCBuiltin<"__builtin_amdgcn_interp_mov">, Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<0>, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p1 , , , // This intrinsic reads from lds, but the memory values are constant, @@ -1287,14 +1287,14 @@ def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2 , , , , def int_amdgcn_interp_p2 : GCCBuiltin<"__builtin_amdgcn_interp_p2">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>]>; // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. // __builtin_amdgcn_interp_p1_f16 , , , , @@ -1302,14 +1302,14 @@ def int_amdgcn_interp_p1_f16 : GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // __builtin_amdgcn_interp_p2_f16 , , , , , def int_amdgcn_interp_p2_f16 : GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">, Intrinsic<[llvm_half_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + [IntrNoMem, IntrSpeculatable, ImmArg>, ImmArg>, ImmArg>]>; // Pixel shaders only: whether the current pixel is live (i.e. not a helper // invocation for derivative computation). @@ -1333,7 +1333,7 @@ def int_amdgcn_mbcnt_hi : def int_amdgcn_ds_swizzle : GCCBuiltin<"__builtin_amdgcn_ds_swizzle">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrConvergent, ImmArg<1>]>; + [IntrNoMem, IntrConvergent, ImmArg>]>; def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], @@ -1401,11 +1401,11 @@ def int_amdgcn_cvt_pk_u8_f32 : def int_amdgcn_icmp : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, ImmArg<2>]>; + [IntrNoMem, IntrConvergent, ImmArg>]>; def int_amdgcn_fcmp : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, ImmArg<2>]>; + [IntrNoMem, IntrConvergent, ImmArg>]>; def int_amdgcn_ballot : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty], [IntrNoMem, IntrConvergent]>; @@ -1561,13 +1561,13 @@ def int_amdgcn_set_inactive : // Return if the given flat pointer points to a local memory address. def int_amdgcn_is_shared : GCCBuiltin<"__builtin_amdgcn_is_shared">, Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture<0>] + [IntrNoMem, IntrSpeculatable, NoCapture>] >; // Return if the given flat pointer points to a prvate memory address. def int_amdgcn_is_private : GCCBuiltin<"__builtin_amdgcn_is_private">, Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], - [IntrNoMem, IntrSpeculatable, NoCapture<0>] + [IntrNoMem, IntrSpeculatable, NoCapture>] >; // Waterfall intrinics used to tag a region as requiring waterfall loops to @@ -1638,8 +1638,8 @@ def int_amdgcn_buffer_wbinvl1_vol : def int_amdgcn_mov_dpp : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>, - ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg>, + ImmArg>, ImmArg>, ImmArg>]>; // llvm.amdgcn.update.dpp.i32 // Should be equivalent to: @@ -1650,7 +1650,7 @@ def int_amdgcn_update_dpp : [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, - ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_s_dcache_wb : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, @@ -1682,13 +1682,13 @@ def int_amdgcn_ds_bpermute : def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>]>; // llvm.amdgcn.permlanex16 def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, IntrConvergent, ImmArg>, ImmArg>]>; // llvm.amdgcn.mov.dpp8.i32 // is a 32-bit constant whose high 8 bits must be zero which selects @@ -1696,7 +1696,7 @@ def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, def int_amdgcn_mov_dpp8 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrConvergent, ImmArg<1>]>; + [IntrNoMem, IntrConvergent, ImmArg>]>; def int_amdgcn_s_get_waveid_in_workgroup : GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, @@ -1718,7 +1718,7 @@ def int_amdgcn_fdot2 : llvm_float_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp) @@ -1733,7 +1733,7 @@ def int_amdgcn_sdot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp) @@ -1748,7 +1748,7 @@ def int_amdgcn_udot2 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp) @@ -1763,7 +1763,7 @@ def int_amdgcn_sdot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp) @@ -1778,7 +1778,7 @@ def int_amdgcn_udot4 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp) @@ -1794,7 +1794,7 @@ def int_amdgcn_sdot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp) @@ -1810,7 +1810,7 @@ def int_amdgcn_udot8 : llvm_i32_ty, // %c llvm_i1_ty // %clamp ], - [IntrNoMem, IntrSpeculatable, ImmArg<3>] + [IntrNoMem, IntrSpeculatable, ImmArg>] >; //===----------------------------------------------------------------------===// @@ -1831,7 +1831,7 @@ class AMDGPUGlobalAtomicNoRtn : Intrinsic < [], [llvm_anyptr_ty, // vaddr llvm_anyfloat_ty], // vdata(VGPR) - [IntrArgMemOnly, NoCapture<0>], "", [SDNPMemOperand]>; + [IntrArgMemOnly, NoCapture>], "", [SDNPMemOperand]>; def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn; def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; @@ -1841,121 +1841,121 @@ def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32 Intrinsic<[llvm_v32f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">, Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">, Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">, Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">, Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">, Intrinsic<[llvm_v32f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">, Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">, Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">, Intrinsic<[llvm_v32i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">, Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">, Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">, Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">, Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">, Intrinsic<[llvm_v32f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">, Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">, Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">, Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + [IntrConvergent, IntrNoMem, ImmArg>, ImmArg>, ImmArg>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 1606d666fa6a93..adeafbb267b2b8 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -19,7 +19,7 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". // A space-consuming intrinsic primarily for testing ARMConstantIslands. The // first argument is the number of bytes this "instruction" takes up, the second // and return value are essentially chains, used to force ordering during ISel. -def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; +def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; // 16-bit multiplications def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, @@ -262,59 +262,59 @@ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty], // Coprocessor def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg>, ImmArg>]>; // Move to coprocessor def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; // Move from coprocessor def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">, MSBuiltin<"_MoveFromCoprocessor">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">, MSBuiltin<"_MoveFromCoprocessor2">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; // Coprocessor data processing def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; // Move from two registers to coprocessor def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>]>; def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>]>; def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>]>; def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>, ImmArg>]>; //===----------------------------------------------------------------------===// // CRC32 @@ -695,16 +695,16 @@ def int_arm_neon_vst4 : Intrinsic<[], def int_arm_neon_vst1x2 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x3 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_arm_neon_vst1x4 : Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; // Vector store N-element structure from one lane. // Source operands are: the address, the N vectors, the lane number, and @@ -1297,22 +1297,22 @@ multiclass CDEGPRIntrinsics args> { def "" : Intrinsic< [llvm_i32_ty], !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def a : Intrinsic< [llvm_i32_ty], !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def d: Intrinsic< [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def da: Intrinsic< [llvm_i32_ty /* lo */, llvm_i32_ty /* hi */], !listconcat([llvm_i32_ty /* coproc */, llvm_i32_ty /* acc_lo */, llvm_i32_ty /* acc_hi */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>; @@ -1323,12 +1323,12 @@ multiclass CDEVCXIntrinsics args> { def "" : Intrinsic< [llvm_anyfloat_ty], !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def a : Intrinsic< [llvm_anyfloat_ty], !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>; @@ -1339,23 +1339,23 @@ multiclass CDEVCXVecIntrinsics args> { def "" : Intrinsic< [llvm_v16i8_ty], !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def a : Intrinsic< [llvm_v16i8_ty], !listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */], args, [llvm_i32_ty /* imm */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def _predicated : Intrinsic< [llvm_anyvector_ty], !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */], args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def a_predicated : Intrinsic< [llvm_anyvector_ty], !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */], args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]), - [IntrNoMem, ImmArg<0>, ImmArg]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>; diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index a43ca0e2569bc6..c4d35b2a0a88c5 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -22,7 +22,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; def int_bpf_preserve_field_info : GCCBuiltin<"__builtin_bpf_preserve_field_info">, Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">, Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td index 163396365e7505..fe16a361ba3d65 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -51,19 +51,19 @@ class Hexagon_mem_memmemsisi_Intrinsic : Hexagon_Intrinsic]>; + [IntrArgMemOnly, ImmArg>]>; class Hexagon_mem_memsisisi_Intrinsic : Hexagon_Intrinsic]>; + [IntrWriteMem, ImmArg>]>; class Hexagon_mem_memdisisi_Intrinsic : Hexagon_Intrinsic]>; + [IntrWriteMem, ImmArg>]>; // // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4) @@ -131,34 +131,34 @@ def llvm_ptr64_ty : LLVMPointerType; // Mark locked loads as read/write to prevent any accidental reordering. def int_hexagon_L2_loadw_locked : Hexagon_Intrinsic<"HEXAGON_L2_loadw_locked", [llvm_i32_ty], [llvm_ptr32_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_hexagon_L4_loadd_locked : Hexagon_Intrinsic<"HEXAGON_L4_loadd_locked", [llvm_i64_ty], [llvm_ptr64_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_hexagon_S2_storew_locked : Hexagon_Intrinsic<"HEXAGON_S2_storew_locked", [llvm_i32_ty], - [llvm_ptr32_ty, llvm_i32_ty], [IntrArgMemOnly, NoCapture<0>]>; + [llvm_ptr32_ty, llvm_i32_ty], [IntrArgMemOnly, NoCapture>]>; def int_hexagon_S4_stored_locked : Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty], - [llvm_ptr64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<0>]>; + [llvm_ptr64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture>]>; def int_hexagon_vmemcpy : Hexagon_Intrinsic<"hexagon_vmemcpy", [], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>; + [IntrArgMemOnly, NoCapture>, NoCapture>, WriteOnly>, ReadOnly>]>; def int_hexagon_vmemset : Hexagon_Intrinsic<"hexagon_vmemset", [], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + [IntrArgMemOnly, NoCapture>, WriteOnly>]>; multiclass Hexagon_custom_circ_ld_Intrinsic { def NAME#_pci : Hexagon_NonGCC_Intrinsic< [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<3>]>; + [IntrArgMemOnly, NoCapture>]>; def NAME#_pcr : Hexagon_NonGCC_Intrinsic< [ElTy, llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<2>]>; + [IntrArgMemOnly, NoCapture>]>; } defm int_hexagon_L2_loadrub : Hexagon_custom_circ_ld_Intrinsic; @@ -172,10 +172,10 @@ multiclass Hexagon_custom_circ_st_Intrinsic { def NAME#_pci : Hexagon_NonGCC_Intrinsic< [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, ElTy, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<4>]>; + [IntrArgMemOnly, NoCapture>]>; def NAME#_pcr : Hexagon_NonGCC_Intrinsic< [llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty, ElTy, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<3>]>; + [IntrArgMemOnly, NoCapture>]>; } defm int_hexagon_S2_storerb : Hexagon_custom_circ_st_Intrinsic; diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index 67a06f5c06f4e6..198b6a7ab0d1ef 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -1100,10 +1100,10 @@ def int_hexagon_C2_cmpgtup : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtup">; def int_hexagon_A4_rcmpeqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_rcmpneqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_rcmpeq : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeq">; @@ -1124,19 +1124,19 @@ def int_hexagon_C4_nbitsclr : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclr">; def int_hexagon_C2_cmpeqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_cmpgti : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_cmpgtui : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_cmpgei : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_cmpgeui : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_cmplt : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmplt">; @@ -1145,19 +1145,19 @@ def int_hexagon_C2_cmpltu : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpltu">; def int_hexagon_C2_bitsclri : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri", [IntrNoMem, ImmArg>]>; def int_hexagon_C4_nbitsclri : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri", [IntrNoMem, ImmArg>]>; def int_hexagon_C4_cmpneqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi", [IntrNoMem, ImmArg>]>; def int_hexagon_C4_cmpltei : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei", [IntrNoMem, ImmArg>]>; def int_hexagon_C4_cmplteui : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui", [IntrNoMem, ImmArg>]>; def int_hexagon_C4_cmpneq : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneq">; @@ -1226,13 +1226,13 @@ def int_hexagon_C2_mux : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_mux">; def int_hexagon_C2_muxii : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii", [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_C2_muxir : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_muxri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri", [IntrNoMem, ImmArg>]>; def int_hexagon_C2_vmux : Hexagon_i64_i32i64i64_Intrinsic<"HEXAGON_C2_vmux">; @@ -1244,7 +1244,7 @@ def int_hexagon_A2_vcmpbeq : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbeq">; def int_hexagon_A4_vcmpbeqi : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmpbeq_any : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbeq_any">; @@ -1253,31 +1253,31 @@ def int_hexagon_A2_vcmpbgtu : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpbgtu">; def int_hexagon_A4_vcmpbgtui : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmpbgt : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbgt">; def int_hexagon_A4_vcmpbgti : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmpbeq : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeq">; def int_hexagon_A4_cmpbeqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmpbgtu : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtu">; def int_hexagon_A4_cmpbgtui : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmpbgt : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgt">; def int_hexagon_A4_cmpbgti : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_vcmpheq : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpheq">; @@ -1289,13 +1289,13 @@ def int_hexagon_A2_vcmphgtu : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmphgtu">; def int_hexagon_A4_vcmpheqi : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmphgti : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmphgtui : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmpheq : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheq">; @@ -1307,13 +1307,13 @@ def int_hexagon_A4_cmphgtu : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtu">; def int_hexagon_A4_cmpheqi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmphgti : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cmphgtui : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_vcmpweq : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpweq">; @@ -1325,13 +1325,13 @@ def int_hexagon_A2_vcmpwgtu : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpwgtu">; def int_hexagon_A4_vcmpweqi : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmpwgti : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_vcmpwgtui : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_boundscheck : Hexagon_i32_i32i64_Intrinsic<"HEXAGON_A4_boundscheck">; @@ -1784,13 +1784,13 @@ def int_hexagon_M2_mpyud_ll_s1 : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">; def int_hexagon_M2_mpysmi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_macsip : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_macsin : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_dpmpyss_s0 : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_s0">; @@ -1847,13 +1847,13 @@ def int_hexagon_M2_acci : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_acci">; def int_hexagon_M2_accii : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_nacci : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_nacci">; def int_hexagon_M2_naccii : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_subacc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_subacc">; @@ -1862,16 +1862,16 @@ def int_hexagon_M4_mpyrr_addr : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addr">; def int_hexagon_M4_mpyri_addr_u2 : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2", [IntrNoMem, ImmArg>]>; def int_hexagon_M4_mpyri_addr : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr", [IntrNoMem, ImmArg>]>; def int_hexagon_M4_mpyri_addi : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_M4_mpyrr_addi : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi", [IntrNoMem, ImmArg<0>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_vmpy2s_s0 : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0">; @@ -2234,10 +2234,10 @@ def int_hexagon_S2_vcrotate : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcrotate">; def int_hexagon_S4_vrcrotate_acc : -Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc", [IntrNoMem, ImmArg<3>]>; +Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_vrcrotate : -Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_vcnegh : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcnegh">; @@ -2270,7 +2270,7 @@ def int_hexagon_A2_subsat : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subsat">; def int_hexagon_A2_addi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_addh_l16_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_ll">; @@ -2411,13 +2411,13 @@ def int_hexagon_A2_tfr : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfr">; def int_hexagon_A2_tfrsi : -Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi", [IntrNoMem, ImmArg<0>]>; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_tfrp : Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_tfrp">; def int_hexagon_A2_tfrpi : -Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi", [IntrNoMem, ImmArg<0>]>; +Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_zxtb : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxtb">; @@ -2435,13 +2435,13 @@ def int_hexagon_A2_combinew : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combinew">; def int_hexagon_A4_combineri : -Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_combineir : -Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir", [IntrNoMem, ImmArg<0>]>; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_combineii : -Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii", [IntrNoMem, ImmArg<0>, ImmArg<1>]>; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_A2_combine_hh : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hh">; @@ -2456,10 +2456,10 @@ def int_hexagon_A2_combine_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_ll">; def int_hexagon_A2_tfril : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_tfrih : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_and : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_and">; @@ -2492,10 +2492,10 @@ def int_hexagon_A4_ornp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_ornp">; def int_hexagon_S4_addaddi : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_subaddi : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi", [IntrNoMem, ImmArg>]>; def int_hexagon_M4_and_and : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_and">; @@ -2522,13 +2522,13 @@ def int_hexagon_M4_or_xor : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_xor">; def int_hexagon_S4_or_andix : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_or_andi : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_or_ori : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori", [IntrNoMem, ImmArg>]>; def int_hexagon_M4_xor_and : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_and">; @@ -2540,13 +2540,13 @@ def int_hexagon_M4_xor_andn : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_andn">; def int_hexagon_A2_subri : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [IntrNoMem, ImmArg<0>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_andir : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_orir : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_andp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">; @@ -2768,19 +2768,19 @@ def int_hexagon_A2_vnavghr : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghr">; def int_hexagon_A4_round_ri : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_round_rr : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr">; def int_hexagon_A4_round_ri_sat : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_round_rr_sat : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr_sat">; def int_hexagon_A4_cround_ri : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_cround_rr : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_rr">; @@ -2891,13 +2891,13 @@ def int_hexagon_F2_sfmin : Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfmin", [IntrNoMem, Throws]>; def int_hexagon_F2_sfclass : -Hexagon_i32_floati32_Intrinsic<"HEXAGON_F2_sfclass", [IntrNoMem, Throws, ImmArg<1>]>; +Hexagon_i32_floati32_Intrinsic<"HEXAGON_F2_sfclass", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_sfimm_p : -Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p", [IntrNoMem, Throws, ImmArg<0>]>; +Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_sfimm_n : -Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n", [IntrNoMem, Throws, ImmArg<0>]>; +Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_sffixupn : Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupn", [IntrNoMem, Throws]>; @@ -2921,13 +2921,13 @@ def int_hexagon_F2_dfcmpuo : Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpuo", [IntrNoMem, Throws]>; def int_hexagon_F2_dfclass : -Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass", [IntrNoMem, Throws, ImmArg<1>]>; +Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_dfimm_p : -Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p", [IntrNoMem, Throws, ImmArg<0>]>; +Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_dfimm_n : -Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n", [IntrNoMem, Throws, ImmArg<0>]>; +Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n", [IntrNoMem, Throws, ImmArg>]>; def int_hexagon_F2_conv_sf2df : Hexagon_double_float_Intrinsic<"HEXAGON_F2_conv_sf2df">; @@ -3146,160 +3146,160 @@ def int_hexagon_S2_asl_r_r_sat : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_sat">; def int_hexagon_S2_asr_i_r : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_acc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_acc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_acc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_acc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_acc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_acc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_nac : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_nac : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_nac : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_nac : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_nac : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_nac : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_xacc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_xacc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_xacc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_xacc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_and : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_and : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_and : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_or : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_or : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_or : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_and : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_and : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_and : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_or : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_or : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_or : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_sat : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_rnd : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_rnd_goodsyntax : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_rnd : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_rnd_goodsyntax : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_lsli : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli", [IntrNoMem, ImmArg<0>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_addasl_rrri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_andi_asl_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_ori_asl_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_addi_asl_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_subi_asl_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_andi_lsr_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_ori_lsr_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_addi_lsr_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_subi_lsr_ri : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri", [IntrNoMem, ImmArg<0>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_valignib : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_valignrb : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignrb">; def int_hexagon_S2_vspliceib : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_vsplicerb : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vsplicerb">; @@ -3311,40 +3311,40 @@ def int_hexagon_S2_vsplatrb : Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_vsplatrb">; def int_hexagon_S2_insert : -Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_insert", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_insert", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_tableidxb_goodsyntax : -Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_tableidxh_goodsyntax : -Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_tableidxw_goodsyntax : -Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_tableidxd_goodsyntax : -Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i32_i32i32i32i32_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_A4_bitspliti : -Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti", [IntrNoMem, ImmArg>]>; def int_hexagon_A4_bitsplit : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitsplit">; def int_hexagon_S4_extract : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract", [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_extractu : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu", [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_insertp : -Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp", [IntrNoMem, ImmArg<2>, ImmArg<3>]>; +Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S4_extractp : -Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp", [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_extractup : -Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup", [IntrNoMem, ImmArg<1>, ImmArg<2>]>; +Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup", [IntrNoMem, ImmArg>, ImmArg>]>; def int_hexagon_S2_insert_rp : Hexagon_i32_i32i32i64_Intrinsic<"HEXAGON_S2_insert_rp">; @@ -3365,19 +3365,19 @@ def int_hexagon_S2_extractup_rp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_extractup_rp">; def int_hexagon_S2_tstbit_i : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_ntstbit_i : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_setbit_i : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_togglebit_i : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_clrbit_i : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_tstbit_r : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_r">; @@ -3395,25 +3395,25 @@ def int_hexagon_S2_clrbit_r : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_r">; def int_hexagon_S2_asr_i_vh : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_vh : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_vh : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_r_vh : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vh">; def int_hexagon_S5_asrhub_rnd_sat_goodsyntax : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S5_asrhub_sat : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat", [IntrNoMem, ImmArg>]>; def int_hexagon_S5_vasrhrnd_goodsyntax : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_r_vh : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vh">; @@ -3425,19 +3425,19 @@ def int_hexagon_S2_lsl_r_vh : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vh">; def int_hexagon_S2_asr_i_vw : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_svw_trun : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_r_svw_trun : Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">; def int_hexagon_S2_lsr_i_vw : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_vw : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_r_vw : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_r_vw">; @@ -3545,13 +3545,13 @@ def int_hexagon_S2_clbnorm : Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clbnorm">; def int_hexagon_S4_clbaddi : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi", [IntrNoMem, ImmArg>]>; def int_hexagon_S4_clbpnorm : Hexagon_i32_i64_Intrinsic<"HEXAGON_S4_clbpnorm">; def int_hexagon_S4_clbpaddi : -Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_clb : Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_clb">; @@ -3619,40 +3619,40 @@ Hexagon__ptri64_Intrinsic<"HEXAGON_Y5_l2fetch", []>; // V60 Scalar Instructions. def int_hexagon_S6_rol_i_r : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_r_acc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p_acc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_r_nac : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p_nac : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_r_xacc : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p_xacc : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_r_and : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_r_or : -Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p_and : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and", [IntrNoMem, ImmArg>]>; def int_hexagon_S6_rol_i_p_or : -Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or", [IntrNoMem, ImmArg<2>]>; +Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or", [IntrNoMem, ImmArg>]>; // V62 Scalar Instructions. @@ -3688,7 +3688,7 @@ def int_hexagon_F2_dfsub : Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfsub", [IntrNoMem, Throws]>; def int_hexagon_S2_mask : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask", [IntrNoMem, ImmArg<0>, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask", [IntrNoMem, ImmArg>, ImmArg>]>; // V67 Scalar Instructions. @@ -3747,16 +3747,16 @@ def int_hexagon_M7_wcmpyiwc_rnd : Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M7_wcmpyiwc_rnd">; def int_hexagon_A7_croundd_ri : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_A7_croundd_ri", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_A7_croundd_ri", [IntrNoMem, ImmArg>]>; def int_hexagon_A7_croundd_rr : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_A7_croundd_rr">; def int_hexagon_A7_clip : -Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A7_clip", [IntrNoMem, ImmArg<1>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A7_clip", [IntrNoMem, ImmArg>]>; def int_hexagon_A7_vclip : -Hexagon_i64_i64i32_Intrinsic<"HEXAGON_A7_vclip", [IntrNoMem, ImmArg<1>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_A7_vclip", [IntrNoMem, ImmArg>]>; def int_hexagon_F2_dfmax : Hexagon_double_doubledouble_Intrinsic<"HEXAGON_F2_dfmax", [IntrNoMem, Throws]>; @@ -3815,16 +3815,16 @@ def int_hexagon_V6_vlalignb_128B : Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignb_128B">; def int_hexagon_V6_valignbi : -Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_valignbi_128B : -Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlalignbi : -Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlalignbi_128B : -Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vror : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vror">; @@ -4121,16 +4121,16 @@ def int_hexagon_V6_vrmpybv_acc_128B : Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">; def int_hexagon_V6_vrmpyubi : -Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpyubi_128B : -Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpyubi_acc : -Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc", [IntrNoMem, ImmArg<3>]>; +Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpyubi_acc_128B : -Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B", [IntrNoMem, ImmArg<3>]>; +Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpybus : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vrmpybus">; @@ -4145,16 +4145,16 @@ def int_hexagon_V6_vrmpybus_acc_128B : Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">; def int_hexagon_V6_vrmpybusi : -Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpybusi_128B : -Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpybusi_acc : -Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc", [IntrNoMem, ImmArg<3>]>; +Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpybusi_acc_128B : -Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B", [IntrNoMem, ImmArg<3>]>; +Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrmpybusv : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vrmpybusv">; @@ -4181,16 +4181,16 @@ def int_hexagon_V6_vdsaduh_acc_128B : Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">; def int_hexagon_V6_vrsadubi : -Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrsadubi_128B : -Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrsadubi_acc : -Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc", [IntrNoMem, ImmArg<3>]>; +Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vrsadubi_acc_128B : -Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B", [IntrNoMem, ImmArg<3>]>; +Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vasrw : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrw">; @@ -5839,28 +5839,28 @@ def int_hexagon_V6_vaddclbh_128B : Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddclbh_128B">; def int_hexagon_V6_vlutvvbi : -Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvvbi_128B : -Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvvb_oracci : -Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci", [IntrNoMem, ImmArg<3>]>; +Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvvb_oracci_128B : -Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B", [IntrNoMem, ImmArg<3>]>; +Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvwhi : -Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi", [IntrNoMem, ImmArg<2>]>; +Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvwhi_128B : -Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B", [IntrNoMem, ImmArg<2>]>; +Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvwh_oracci : -Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci", [IntrNoMem, ImmArg<3>]>; +Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvwh_oracci_128B : -Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B", [IntrNoMem, ImmArg<3>]>; +Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B", [IntrNoMem, ImmArg>]>; def int_hexagon_V6_vlutvvb_nm : Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_nm">; diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td index 9f9d6d78abea86..271142ca7788fa 100644 --- a/llvm/include/llvm/IR/IntrinsicsMips.td +++ b/llvm/include/llvm/IR/IntrinsicsMips.td @@ -234,9 +234,9 @@ def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">, // Misc def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<1>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<0>]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg>]>; def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; @@ -302,10 +302,10 @@ def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">, def int_mips_append: GCCBuiltin<"__builtin_mips_append">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">, Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; @@ -355,14 +355,14 @@ def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">, Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">, Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">, Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">, Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -463,22 +463,22 @@ def int_mips_addv_d : GCCBuiltin<"__builtin_msa_addv_d">, def int_mips_addvi_b : GCCBuiltin<"__builtin_msa_addvi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], - [Commutative, IntrNoMem, ImmArg<1>]>; + [Commutative, IntrNoMem, ImmArg>]>; def int_mips_addvi_h : GCCBuiltin<"__builtin_msa_addvi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], - [Commutative, IntrNoMem, ImmArg<1>]>; + [Commutative, IntrNoMem, ImmArg>]>; def int_mips_addvi_w : GCCBuiltin<"__builtin_msa_addvi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], - [Commutative, IntrNoMem, ImmArg<1>]>; + [Commutative, IntrNoMem, ImmArg>]>; def int_mips_addvi_d : GCCBuiltin<"__builtin_msa_addvi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], - [Commutative, IntrNoMem, ImmArg<1>]>; + [Commutative, IntrNoMem, ImmArg>]>; def int_mips_and_v : GCCBuiltin<"__builtin_msa_and_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_mips_andi_b : GCCBuiltin<"__builtin_msa_andi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_asub_s_b : GCCBuiltin<"__builtin_msa_asub_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -560,13 +560,13 @@ def int_mips_bclr_d : GCCBuiltin<"__builtin_msa_bclr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_bclri_b : GCCBuiltin<"__builtin_msa_bclri_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bclri_h : GCCBuiltin<"__builtin_msa_bclri_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bclri_w : GCCBuiltin<"__builtin_msa_bclri_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bclri_d : GCCBuiltin<"__builtin_msa_bclri_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_binsl_b : GCCBuiltin<"__builtin_msa_binsl_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], @@ -583,16 +583,16 @@ def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">, def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], @@ -609,16 +609,16 @@ def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">, def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], @@ -626,7 +626,7 @@ def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">, def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], @@ -634,7 +634,7 @@ def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">, def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -646,13 +646,13 @@ def int_mips_bneg_d : GCCBuiltin<"__builtin_msa_bneg_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_bnegi_b : GCCBuiltin<"__builtin_msa_bnegi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bnegi_h : GCCBuiltin<"__builtin_msa_bnegi_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bnegi_w : GCCBuiltin<"__builtin_msa_bnegi_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bnegi_d : GCCBuiltin<"__builtin_msa_bnegi_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bnz_b : GCCBuiltin<"__builtin_msa_bnz_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; @@ -672,7 +672,7 @@ def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">, def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -684,13 +684,13 @@ def int_mips_bset_d : GCCBuiltin<"__builtin_msa_bset_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_bseti_b : GCCBuiltin<"__builtin_msa_bseti_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bseti_h : GCCBuiltin<"__builtin_msa_bseti_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bseti_w : GCCBuiltin<"__builtin_msa_bseti_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bseti_d : GCCBuiltin<"__builtin_msa_bseti_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_bz_b : GCCBuiltin<"__builtin_msa_bz_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; @@ -714,16 +714,16 @@ def int_mips_ceq_d : GCCBuiltin<"__builtin_msa_ceq_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_ceqi_b : GCCBuiltin<"__builtin_msa_ceqi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ceqi_h : GCCBuiltin<"__builtin_msa_ceqi_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ceqi_w : GCCBuiltin<"__builtin_msa_ceqi_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ceqi_d : GCCBuiltin<"__builtin_msa_ceqi_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_cfcmsa : GCCBuiltin<"__builtin_msa_cfcmsa">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; def int_mips_cle_s_b : GCCBuiltin<"__builtin_msa_cle_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -744,22 +744,22 @@ def int_mips_cle_u_d : GCCBuiltin<"__builtin_msa_cle_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_clei_s_b : GCCBuiltin<"__builtin_msa_clei_s_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_s_h : GCCBuiltin<"__builtin_msa_clei_s_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_s_w : GCCBuiltin<"__builtin_msa_clei_s_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_s_d : GCCBuiltin<"__builtin_msa_clei_s_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_u_b : GCCBuiltin<"__builtin_msa_clei_u_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_u_h : GCCBuiltin<"__builtin_msa_clei_u_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_u_w : GCCBuiltin<"__builtin_msa_clei_u_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clei_u_d : GCCBuiltin<"__builtin_msa_clei_u_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clt_s_b : GCCBuiltin<"__builtin_msa_clt_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -780,22 +780,22 @@ def int_mips_clt_u_d : GCCBuiltin<"__builtin_msa_clt_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_clti_s_b : GCCBuiltin<"__builtin_msa_clti_s_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_s_h : GCCBuiltin<"__builtin_msa_clti_s_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_s_w : GCCBuiltin<"__builtin_msa_clti_s_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_s_d : GCCBuiltin<"__builtin_msa_clti_s_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_u_b : GCCBuiltin<"__builtin_msa_clti_u_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_u_h : GCCBuiltin<"__builtin_msa_clti_u_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_u_w : GCCBuiltin<"__builtin_msa_clti_u_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -816,7 +816,7 @@ def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">, Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">, - Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; def int_mips_div_s_b : GCCBuiltin<"__builtin_msa_div_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1244,19 +1244,19 @@ def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">, def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_mips_insve_h : GCCBuiltin<"__builtin_msa_insve_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_mips_insve_w : GCCBuiltin<"__builtin_msa_insve_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], @@ -1279,13 +1279,13 @@ def int_mips_ldr_w : GCCBuiltin<"__builtin_msa_ldr_w">, [IntrReadMem, IntrArgMemOnly]>; def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ldi_w : GCCBuiltin<"__builtin_msa_ldi_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_ldi_d : GCCBuiltin<"__builtin_msa_ldi_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg>]>; // This instruction is part of the MSA spec but it does not share the // __builtin_msa prefix because it operates on the GPR registers. @@ -1348,22 +1348,22 @@ def int_mips_max_u_d : GCCBuiltin<"__builtin_msa_max_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_maxi_s_b : GCCBuiltin<"__builtin_msa_maxi_s_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_s_h : GCCBuiltin<"__builtin_msa_maxi_s_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_s_w : GCCBuiltin<"__builtin_msa_maxi_s_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_s_d : GCCBuiltin<"__builtin_msa_maxi_s_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_u_b : GCCBuiltin<"__builtin_msa_maxi_u_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_u_h : GCCBuiltin<"__builtin_msa_maxi_u_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_u_w : GCCBuiltin<"__builtin_msa_maxi_u_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_maxi_u_d : GCCBuiltin<"__builtin_msa_maxi_u_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_min_a_b : GCCBuiltin<"__builtin_msa_min_a_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1393,22 +1393,22 @@ def int_mips_min_u_d : GCCBuiltin<"__builtin_msa_min_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_mini_s_b : GCCBuiltin<"__builtin_msa_mini_s_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_s_h : GCCBuiltin<"__builtin_msa_mini_s_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_s_w : GCCBuiltin<"__builtin_msa_mini_s_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_s_d : GCCBuiltin<"__builtin_msa_mini_s_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_u_b : GCCBuiltin<"__builtin_msa_mini_u_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_u_h : GCCBuiltin<"__builtin_msa_mini_u_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_u_w : GCCBuiltin<"__builtin_msa_mini_u_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mini_u_d : GCCBuiltin<"__builtin_msa_mini_u_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_mod_s_b : GCCBuiltin<"__builtin_msa_mod_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1499,13 +1499,13 @@ def int_mips_nor_v : GCCBuiltin<"__builtin_msa_nor_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_mips_nori_b : GCCBuiltin<"__builtin_msa_nori_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_or_v : GCCBuiltin<"__builtin_msa_or_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_mips_ori_b : GCCBuiltin<"__builtin_msa_ori_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_pckev_b : GCCBuiltin<"__builtin_msa_pckev_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1535,29 +1535,29 @@ def int_mips_pcnt_d : GCCBuiltin<"__builtin_msa_pcnt_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_mips_sat_s_b : GCCBuiltin<"__builtin_msa_sat_s_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_s_h : GCCBuiltin<"__builtin_msa_sat_s_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_s_w : GCCBuiltin<"__builtin_msa_sat_s_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_s_d : GCCBuiltin<"__builtin_msa_sat_s_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_u_b : GCCBuiltin<"__builtin_msa_sat_u_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_u_h : GCCBuiltin<"__builtin_msa_sat_u_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_u_w : GCCBuiltin<"__builtin_msa_sat_u_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sat_u_d : GCCBuiltin<"__builtin_msa_sat_u_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_shf_b : GCCBuiltin<"__builtin_msa_shf_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_shf_h : GCCBuiltin<"__builtin_msa_shf_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -1570,16 +1570,16 @@ def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">, def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1591,13 +1591,13 @@ def int_mips_sll_d : GCCBuiltin<"__builtin_msa_sll_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_slli_b : GCCBuiltin<"__builtin_msa_slli_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_slli_h : GCCBuiltin<"__builtin_msa_slli_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_slli_w : GCCBuiltin<"__builtin_msa_slli_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_slli_d : GCCBuiltin<"__builtin_msa_slli_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_splat_b : GCCBuiltin<"__builtin_msa_splat_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -1609,13 +1609,13 @@ def int_mips_splat_d : GCCBuiltin<"__builtin_msa_splat_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_splati_b : GCCBuiltin<"__builtin_msa_splati_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_splati_h : GCCBuiltin<"__builtin_msa_splati_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_splati_w : GCCBuiltin<"__builtin_msa_splati_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_splati_d : GCCBuiltin<"__builtin_msa_splati_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_sra_b : GCCBuiltin<"__builtin_msa_sra_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1627,13 +1627,13 @@ def int_mips_sra_d : GCCBuiltin<"__builtin_msa_sra_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_srai_b : GCCBuiltin<"__builtin_msa_srai_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srai_h : GCCBuiltin<"__builtin_msa_srai_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srai_w : GCCBuiltin<"__builtin_msa_srai_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srai_d : GCCBuiltin<"__builtin_msa_srai_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srar_b : GCCBuiltin<"__builtin_msa_srar_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1645,13 +1645,13 @@ def int_mips_srar_d : GCCBuiltin<"__builtin_msa_srar_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_srari_b : GCCBuiltin<"__builtin_msa_srari_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srari_h : GCCBuiltin<"__builtin_msa_srari_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srari_w : GCCBuiltin<"__builtin_msa_srari_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srari_d : GCCBuiltin<"__builtin_msa_srari_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srl_b : GCCBuiltin<"__builtin_msa_srl_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1663,13 +1663,13 @@ def int_mips_srl_d : GCCBuiltin<"__builtin_msa_srl_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_srli_b : GCCBuiltin<"__builtin_msa_srli_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srli_h : GCCBuiltin<"__builtin_msa_srli_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srli_w : GCCBuiltin<"__builtin_msa_srli_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srli_d : GCCBuiltin<"__builtin_msa_srli_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srlr_b : GCCBuiltin<"__builtin_msa_srlr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -1681,13 +1681,13 @@ def int_mips_srlr_d : GCCBuiltin<"__builtin_msa_srlr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_srlri_b : GCCBuiltin<"__builtin_msa_srlri_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srlri_h : GCCBuiltin<"__builtin_msa_srlri_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">, Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], @@ -1755,13 +1755,13 @@ def int_mips_subv_d : GCCBuiltin<"__builtin_msa_subv_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_subvi_b : GCCBuiltin<"__builtin_msa_subvi_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_subvi_h : GCCBuiltin<"__builtin_msa_subvi_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_subvi_w : GCCBuiltin<"__builtin_msa_subvi_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_subvi_d : GCCBuiltin<"__builtin_msa_subvi_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_mips_vshf_b : GCCBuiltin<"__builtin_msa_vshf_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], @@ -1780,5 +1780,5 @@ def int_mips_xor_v : GCCBuiltin<"__builtin_msa_xor_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_mips_xori_b : GCCBuiltin<"__builtin_msa_xori_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index ec328d69a8dd94..61293418ec41d7 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -978,20 +978,20 @@ let TargetPrefix = "nvvm" in { // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty], [LLVMAnyPointerType, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; class SCOPED_ATOMIC2_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; class SCOPED_ATOMIC3_impl : Intrinsic<[elty], [LLVMAnyPointerType>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; multiclass PTXAtomicWithScope2 { def _cta : SCOPED_ATOMIC2_impl; @@ -1063,30 +1063,30 @@ let TargetPrefix = "nvvm" in { // pointer's alignment. def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the // pointer's alignment. def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [LLVMAnyPointerType>, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, NoCapture>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers @@ -1143,7 +1143,7 @@ def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty], def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem], "llvm.nvvm.move.double">; def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">; + [IntrNoMem, NoCapture>], "llvm.nvvm.move.ptr">; // For getting the handle from a texture or surface variable @@ -4110,7 +4110,7 @@ def int_nvvm_match_all_sync_i64p : class NVVM_WMMA_LD : Intrinsic, NoCapture<0>], + [IntrReadMem, IntrArgMemOnly, ReadOnly>, NoCapture>], WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>; // WMMA.STORE.D @@ -4120,7 +4120,7 @@ class NVVM_WMMA_ST [llvm_anyptr_ty], Frag.regs, !if(WithStride, [llvm_i32_ty], [])), - [IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>], + [IntrWriteMem, IntrArgMemOnly, WriteOnly>, NoCapture>], WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>; // Create all load/store variants diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 2b3fcd7f234163..c23f04f7105954 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -25,9 +25,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], - [IntrArgMemOnly, NoCapture<0>]>; + [IntrArgMemOnly, NoCapture>]>; def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; @@ -620,16 +620,16 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // FP <-> integer conversion. def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">, Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">, Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; @@ -726,11 +726,11 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_crypto_vshasigmad : GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_ppc_altivec_crypto_vshasigmaw : GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; } def int_ppc_altivec_crypto_vcipher : PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">; @@ -925,10 +925,10 @@ def int_ppc_vsx_xvxsigsp : [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xvtstdcdp : PowerPC_VSX_Intrinsic<"xvtstdcdp", [llvm_v2i64_ty], - [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_ppc_vsx_xvtstdcsp : PowerPC_VSX_Intrinsic<"xvtstdcsp", [llvm_v4i32_ty], - [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_ppc_vsx_xvcvhpsp : PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty], [llvm_v8i16_ty],[IntrNoMem]>; @@ -1123,9 +1123,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; def int_ppc_tend : GCCBuiltin<"__builtin_tend">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg>]>; def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 2039ad1a26b888..7590b568c367b1 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -28,11 +28,11 @@ let TargetPrefix = "riscv" in { // T @llvm..T.

(any*, T, T, T imm); class MaskedAtomicRMWFourArg : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype], - [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>; + [IntrArgMemOnly, NoCapture>, ImmArg>]>; // T @llvm..T.

(any*, T, T, T, T imm); class MaskedAtomicRMWFiveArg : Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype, itype], - [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>; + [IntrArgMemOnly, NoCapture>, ImmArg>]>; // We define 32-bit and 64-bit variants of the above, where T stands for i32 // or i64 respectively: diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td index dd156a3dc3b606..b0c5cf0148fe5a 100644 --- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td +++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td @@ -39,7 +39,7 @@ class SystemZBinaryConvCC class SystemZBinaryConvIntCC : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; class SystemZBinaryCC : SystemZBinaryConvCC; @@ -56,20 +56,20 @@ class SystemZTernary class SystemZTernaryInt : GCCBuiltin<"__builtin_s390_" # name>, - Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg>]>; class SystemZTernaryIntCC : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; class SystemZQuaternaryInt : GCCBuiltin<"__builtin_s390_" # name>, Intrinsic<[type], [type, type, type, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; class SystemZQuaternaryIntCC : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; multiclass SystemZUnaryExtBHF { def b : SystemZUnaryConv; @@ -238,11 +238,11 @@ let TargetPrefix = "s390" in { let TargetPrefix = "s390" in { def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">, Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], @@ -251,7 +251,7 @@ let TargetPrefix = "s390" in { def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">, Intrinsic<[llvm_v16i8_ty], @@ -317,7 +317,7 @@ let TargetPrefix = "s390" in { def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">; @@ -376,7 +376,7 @@ let TargetPrefix = "s390" in { def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; // Instructions from the Vector Enhancements Facility 1 def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty, @@ -385,20 +385,20 @@ let TargetPrefix = "s390" in { def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">, Intrinsic<[llvm_v16i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vfcesbs : SystemZBinaryConvCC; def int_s390_vfchsbs : SystemZBinaryConvCC; @@ -408,7 +408,7 @@ let TargetPrefix = "s390" in { def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; // Instructions from the Vector Packed Decimal Facility def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">, @@ -423,12 +423,12 @@ let TargetPrefix = "s390" in { def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_s390_vstrsb : SystemZTernaryConvCC; def int_s390_vstrsh : SystemZTernaryConvCC; diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index babe42f8631915..97bd76e49f6d8d 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -51,7 +51,7 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty], // throw / rethrow def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], - [Throws, IntrNoReturn, ImmArg<0>]>; + [Throws, IntrNoReturn, ImmArg>]>; def int_wasm_rethrow_in_catch : Intrinsic<[], [], [Throws, IntrNoReturn]>; // Since wasm does not use landingpad instructions, these instructions return @@ -69,7 +69,7 @@ def int_wasm_extract_exception : Intrinsic<[llvm_ptr_ty], [], // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is // used in order to give them the indices in WasmEHPrepare. def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; // Returns LSDA address of the current function. def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; @@ -82,18 +82,18 @@ def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_wasm_atomic_wait_i32 : Intrinsic<[llvm_i32_ty], [LLVMPointerType, llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>, + [IntrInaccessibleMemOrArgMemOnly, ReadOnly>, NoCapture>, IntrHasSideEffects], "", [SDNPMemOperand]>; def int_wasm_atomic_wait_i64 : Intrinsic<[llvm_i32_ty], [LLVMPointerType, llvm_i64_ty, llvm_i64_ty], - [IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>, + [IntrInaccessibleMemOrArgMemOnly, ReadOnly>, NoCapture>, IntrHasSideEffects], "", [SDNPMemOperand]>; def int_wasm_atomic_notify: Intrinsic<[llvm_i32_ty], [LLVMPointerType, llvm_i32_ty], - [IntrInaccessibleMemOnly, NoCapture<0>, IntrHasSideEffects], "", + [IntrInaccessibleMemOnly, NoCapture>, IntrHasSideEffects], "", [SDNPMemOperand]>; //===----------------------------------------------------------------------===// @@ -194,12 +194,12 @@ def int_wasm_pmax : def int_wasm_memory_init : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], - [IntrWriteMem, IntrInaccessibleMemOrArgMemOnly, WriteOnly<2>, - IntrHasSideEffects, ImmArg<0>, ImmArg<1>]>; + [IntrWriteMem, IntrInaccessibleMemOrArgMemOnly, WriteOnly>, + IntrHasSideEffects, ImmArg>, ImmArg>]>; def int_wasm_data_drop : Intrinsic<[], [llvm_i32_ty], - [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>; + [IntrNoDuplicate, IntrHasSideEffects, ImmArg>]>; //===----------------------------------------------------------------------===// // Thread-local storage intrinsics diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 1bd2b88ae8c5bd..b3bf187205958a 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// // Interrupt traps let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_int : Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>; + def int_x86_int : Intrinsic<[], [llvm_i8_ty], [ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -203,12 +203,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; // NOTE: This comparison intrinsic is not used by clang as long as the // distinction in signaling behaviour is not implemented. def int_x86_sse_cmp_ps : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; @@ -319,12 +319,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; // NOTE: This comparison intrinsic is not used by clang as long as the // distinction in signaling behaviour is not implemented. def int_x86_sse2_cmp_pd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; @@ -618,7 +618,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; } // Sign ops @@ -664,16 +664,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Vector min element @@ -736,20 +736,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_aesni_aeskeygenassist : GCCBuiltin<"__builtin_ia32_aeskeygenassist128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; } // PCLMUL instructions let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; } // Vector pack @@ -763,7 +763,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; } // Vector blend @@ -783,17 +783,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg<2>]>; + [IntrNoMem, Commutative, ImmArg>]>; def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg<2>]>; + [IntrNoMem, Commutative, ImmArg>]>; } // Vector sum of absolute differences let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg<2>]>; + [IntrNoMem, Commutative, ImmArg>]>; } // Test instruction with bitwise comparison. @@ -834,66 +834,66 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -902,14 +902,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>, ImmArg<2>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } @@ -946,10 +946,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Horizontal ops @@ -1101,33 +1101,33 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineinvqb_256 : GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineinvqb_512 : GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_128 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_256 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">, Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8affineqb_512 : GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">, Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vgf2p8mulb_128 : GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">, @@ -1161,17 +1161,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg<2>]>; + [IntrNoMem, Commutative, ImmArg>]>; } // Vector compare let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_cmp_pd_256 : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, - llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx_cmp_ps_256 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, - llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; } // Vector convert @@ -1238,30 +1238,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_fpclass_pd_128 : Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_fpclass_pd_256 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_fpclass_pd_512 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_fpclass_ps_128 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_fpclass_ps_256 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_fpclass_ps_512 : Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fpclass_sd : GCCBuiltin<"__builtin_ia32_fpclasssd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fpclass_ss : GCCBuiltin<"__builtin_ia32_fpclassss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; } // Vector extract sign mask @@ -1707,68 +1707,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; } // Misc. @@ -1780,7 +1780,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, - llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<2>]>; + llvm_i8_ty], [IntrNoMem, Commutative, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -1809,31 +1809,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_vfmadd_pd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vfmadd_ps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vfmaddsub_pd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vfmaddsub_ps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vfmadd_f64 : Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vfmadd_f32 : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vpmadd52h_uq_128 : GCCBuiltin<"__builtin_ia32_vpmadd52huq128">, @@ -1923,23 +1923,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_xop_vpermil2pd_256 : GCCBuiltin<"__builtin_ia32_vpermil2pd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_xop_vpermil2ps_256 : GCCBuiltin<"__builtin_ia32_vpermil2ps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; @@ -2110,19 +2110,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_lwpins32 : GCCBuiltin<"__builtin_ia32_lwpins32">, Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg<2>]>; + [ImmArg>]>; def int_x86_lwpins64 : GCCBuiltin<"__builtin_ia32_lwpins64">, Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg<2>]>; + [ImmArg>]>; def int_x86_lwpval32 : GCCBuiltin<"__builtin_ia32_lwpval32">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg<2>]>; + [ImmArg>]>; def int_x86_lwpval64 : GCCBuiltin<"__builtin_ia32_lwpval64">, Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg<2>]>; + [ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -2423,15 +2423,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, - llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>; + llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">, Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -2548,26 +2548,26 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_vcvtph2ps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">, Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">, Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -2576,10 +2576,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -2625,7 +2625,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">, Intrinsic<[], [], []>; def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">, - Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>; + Intrinsic<[], [llvm_i8_ty], [ImmArg>]>; def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, Intrinsic<[llvm_i32_ty], [], []>; } @@ -2667,70 +2667,70 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">, Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">, Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; } // Pack ops. @@ -2753,11 +2753,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_sitofp_round : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_uitofp_round : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2dq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">, @@ -2769,25 +2769,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtsd2ss_round : GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtss2sd_round : GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">, @@ -2811,7 +2811,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2udq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">, @@ -2829,7 +2829,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtpd2uqq_128 : GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">, @@ -2847,7 +2847,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2dq_128 : GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">, @@ -2865,13 +2865,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2pd_512 : GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2qq_128 : GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">, @@ -2889,7 +2889,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2udq_128 : GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">, @@ -2907,7 +2907,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtps2uqq_128 : GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">, @@ -2925,7 +2925,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">, @@ -2943,7 +2943,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2qq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">, @@ -2961,7 +2961,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2udq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">, @@ -2979,7 +2979,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">, Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttpd2uqq_128 : GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">, @@ -2997,13 +2997,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2dq_512 : GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2qq_128 : GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">, @@ -3021,7 +3021,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2udq_128 : GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">, @@ -3039,7 +3039,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvttps2uqq_128 : GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">, @@ -3057,7 +3057,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">, Intrinsic<[llvm_v8i64_ty], [llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cvtuqq2ps_128 : GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">, @@ -3068,75 +3068,75 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } // Vector load with broadcast @@ -3166,111 +3166,111 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>; + llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>, ImmArg<5>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3280,7 +3280,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3290,103 +3290,103 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_sqrt_ss : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_sqrt_sd : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_sqrt_pd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_sqrt_ps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_128 : GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_128 : GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_256 : GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_256 : GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_pd_512 : GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_pd_512 : GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_128 : GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_128 : GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_256 : GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_256 : GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ps_512 : GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ps_512 : GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_sd : GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_sd : GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_fixupimm_ss : GCCBuiltin<"__builtin_ia32_fixupimmss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_maskz_fixupimm_ss : GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3396,7 +3396,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; @@ -3406,64 +3406,64 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_128 : GCCBuiltin<"__builtin_ia32_getmantpd128_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_256 : GCCBuiltin<"__builtin_ia32_getmantpd256_mask">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_pd_512 : GCCBuiltin<"__builtin_ia32_getmantpd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_128 : GCCBuiltin<"__builtin_ia32_getmantps128_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_256 : GCCBuiltin<"__builtin_ia32_getmantps256_mask">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_getmant_ps_512 : GCCBuiltin<"__builtin_ia32_getmantps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty], - [IntrNoMem, ImmArg<1>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_ss : GCCBuiltin<"__builtin_ia32_getmantss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>; + llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_getmant_sd : GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>; + llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, @@ -3518,41 +3518,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<4>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">, Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem, Commutative]>; @@ -3582,19 +3582,19 @@ let TargetPrefix = "x86" in { GCCBuiltin<"__builtin_ia32_dbpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_dbpsadbw_256 : GCCBuiltin<"__builtin_ia32_dbpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_dbpsadbw_512 : GCCBuiltin<"__builtin_ia32_dbpsadbw512">, Intrinsic<[llvm_v32i16_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; } // Gather and Scatter ops @@ -3605,117 +3605,117 @@ let TargetPrefix = "x86" in { def int_x86_avx512_gather_dpd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_dps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_qpd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_qps_512 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_dpq_512 : Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_dpi_512 : Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_qpq_512 : Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather_qpi_512 : Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div2_df : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div2_di : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div4_df : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div4_di : Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div4_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div4_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div8_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3div8_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv2_df : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv2_di : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv4_df : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv4_di : Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv4_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv4_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv8_sf : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_gather3siv8_si : Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; // scatter // NOTE: These are deprecated in favor of the versions that take a vXi1 mask. @@ -3724,149 +3724,149 @@ let TargetPrefix = "x86" in { def int_x86_avx512_scatter_dpd_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_dps_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_qpd_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_qps_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_dpq_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_dpi_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_qpq_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatter_qpi_512 : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv2_df : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv2_di : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv4_df : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv4_di : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv4_sf : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv4_si : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv8_sf : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scatterdiv8_si : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv2_df : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv2_di : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv4_df : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv4_di : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv4_sf : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv4_si : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv8_sf : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_scattersiv8_si : Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; // gather prefetch // NOTE: These can't be ArgMemOnly because you can put the address completely // in the index register. def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; // scatter prefetch // NOTE: These can't be ArgMemOnly because you can put the address completely // in the index register. def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">, Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">, Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>; + llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; } // AVX512 gather/scatter intrinsics that use vXi1 masks. @@ -3876,134 +3876,134 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_gather_dpd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_dps_512 : Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_qpd_512 : Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_qps_512 : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_dpq_512 : Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_dpi_512 : Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty, llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_qpq_512 : Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather_qpi_512 : Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div2_df : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div2_di : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div4_df : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div4_di : Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div4_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div4_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div8_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3div8_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv2_df : Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv2_di : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv4_df : Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv4_di : Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv4_sf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv4_si : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv8_sf : Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_gather3siv8_si : Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrReadMem, ImmArg<4>]>; + [IntrReadMem, ImmArg>]>; def int_x86_avx512_mask_scatter_dpd_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_dps_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_qpd_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_qps_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; // NOTE: These can't be ArgMemOnly because you can put the address completely @@ -4011,99 +4011,99 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_scatter_dpq_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_dpi_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_qpq_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatter_qpi_512 : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv2_df : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv2_di : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv4_df : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv4_di : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv4_sf : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv4_si : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv8_sf : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scatterdiv8_si : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv2_df : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv2_di : Intrinsic<[], [llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv4_df : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv4_di : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv4_sf : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv4_si : Intrinsic<[], [llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv8_sf : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; def int_x86_avx512_mask_scattersiv8_si : Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], - [ImmArg<4>]>; + [ImmArg>]>; } // AVX-512 conflict detection instruction @@ -4136,11 +4136,11 @@ let TargetPrefix = "x86" in { def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } // Compress, Expand @@ -4684,37 +4684,37 @@ let TargetPrefix = "x86" in { GCCBuiltin<"__builtin_ia32_pternlogd128">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_256 : GCCBuiltin<"__builtin_ia32_pternlogd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_512 : GCCBuiltin<"__builtin_ia32_pternlogd512">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<3>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_128 : GCCBuiltin<"__builtin_ia32_pternlogq128">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_256 : GCCBuiltin<"__builtin_ia32_pternlogq256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_512 : GCCBuiltin<"__builtin_ia32_pternlogq512">, Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<3>]>; + [IntrNoMem, ImmArg>]>; } // vp2intersect @@ -4752,34 +4752,34 @@ let TargetPrefix = "x86" in { def int_x86_avx512_cmp_ps_512 : Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_cmp_pd_512 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<3>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_cmp_ps_256 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cmp_pd_256 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cmp_ps_128 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_cmp_pd_128 : Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<2>]>; + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss_mask">, Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; def int_x86_avx512_mask_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<2>, ImmArg<4>]>; + [IntrNoMem, ImmArg>, ImmArg>]>; } //===----------------------------------------------------------------------===// @@ -4787,7 +4787,7 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg<2>]>; + [IntrNoMem, ImmArg>]>; def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">, diff --git a/llvm/include/llvm/IR/IntrinsicsXCore.td b/llvm/include/llvm/IR/IntrinsicsXCore.td index 7fe8bdfd3bd011..89dbc65fea4459 100644 --- a/llvm/include/llvm/IR/IntrinsicsXCore.td +++ b/llvm/include/llvm/IR/IntrinsicsXCore.td @@ -38,58 +38,58 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". // Resource instructions. def int_xcore_getr : Intrinsic<[llvm_anyptr_ty],[llvm_i32_ty]>; def int_xcore_freer : Intrinsic<[],[llvm_anyptr_ty], - [NoCapture<0>]>; - def int_xcore_in : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],[NoCapture<0>]>; + [NoCapture>]>; + def int_xcore_in : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],[NoCapture>]>; def int_xcore_int : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_inct : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_out : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_outt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_outct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_chkct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_testct : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_testwct : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_setd : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_setc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_inshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_outshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_setpt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_clrpt : Intrinsic<[],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_getts : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_syncr : Intrinsic<[],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_settw : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_setv : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_setev : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; - def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>; - def int_xcore_edu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>; + [NoCapture>]>; + def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture>]>; + def int_xcore_edu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture>]>; def int_xcore_setclk : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty], - [NoCapture<0>, NoCapture<1>]>; + [NoCapture>, NoCapture>]>; def int_xcore_setrdy : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty], - [NoCapture<0>, NoCapture<1>]>; + [NoCapture>, NoCapture>]>; def int_xcore_setpsc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_peek : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_endin : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; // Intrinsics for events. def int_xcore_waitevent : Intrinsic<[llvm_ptr_ty],[], [IntrReadMem]>; @@ -103,18 +103,18 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". // Intrinsics for threads. def int_xcore_getst : Intrinsic <[llvm_anyptr_ty],[llvm_anyptr_ty], - [NoCapture<0>]>; - def int_xcore_msync : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>; + [NoCapture>]>; + def int_xcore_msync : Intrinsic <[],[llvm_anyptr_ty], [NoCapture>]>; def int_xcore_ssync : Intrinsic <[],[]>; - def int_xcore_mjoin : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>; + def int_xcore_mjoin : Intrinsic <[],[llvm_anyptr_ty], [NoCapture>]>; def int_xcore_initsp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_initpc : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_initlr : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_initcp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; def int_xcore_initdp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], - [NoCapture<0>]>; + [NoCapture>]>; } diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index 89f130bc335179..34eb1126b373fb 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -65,34 +65,82 @@ bool isGCRelocate(const Value *V); bool isGCResult(const CallBase *Call); bool isGCResult(const Value *V); +/// Represents a gc.statepoint intrinsic call. This extends directly from +/// CallBase as the IntrinsicInst only supports calls and gc.statepoint is +/// invokable. +class GCStatepointInst : public CallBase { +public: + GCStatepointInst() = delete; + GCStatepointInst(const GCStatepointInst &) = delete; + GCStatepointInst &operator=(const GCStatepointInst &) = delete; + + static bool classof(const CallBase *I) { + if (const Function *CF = I->getCalledFunction()) + return CF->getIntrinsicID() == Intrinsic::experimental_gc_statepoint; + return false; + } + + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + enum { + IDPos = 0, + NumPatchBytesPos = 1, + CalledFunctionPos = 2, + NumCallArgsPos = 3, + FlagsPos = 4, + CallArgsBeginPos = 5, + }; + + /// Return the ID associated with this statepoint. + uint64_t getID() const { + return cast(getArgOperand(IDPos))->getZExtValue(); + } + + /// Return the number of patchable bytes associated with this statepoint. + uint32_t getNumPatchBytes() const { + const Value *NumPatchBytesVal = getArgOperand(NumPatchBytesPos); + uint64_t NumPatchBytes = + cast(NumPatchBytesVal)->getZExtValue(); + assert(isInt<32>(NumPatchBytes) && "should fit in 32 bits!"); + return NumPatchBytes; + } + + /// Number of arguments to be passed to the actual callee. + int getNumCallArgs() const { + return cast(getArgOperand(NumCallArgsPos))->getZExtValue(); + } + + uint64_t getFlags() const { + return cast(getArgOperand(FlagsPos))->getZExtValue(); + } +}; + /// A wrapper around a GC intrinsic call, this provides most of the actual /// functionality for Statepoint and ImmutableStatepoint. It is /// templatized to allow easily specializing of const and non-const /// concrete subtypes. template + typename CallTy> class StatepointBase { - CallBaseTy *StatepointCall; + CallTy *StatepointCall; protected: explicit StatepointBase(InstructionTy *I) { - StatepointCall = isStatepoint(I) ? cast(I) : nullptr; + StatepointCall = isStatepoint(I) ? cast(I) : nullptr; } - explicit StatepointBase(CallBaseTy *Call) { + explicit StatepointBase(CallTy *Call) { StatepointCall = isStatepoint(Call) ? Call : nullptr; } public: - using arg_iterator = typename CallBaseTy::const_op_iterator; + using arg_iterator = typename CallTy::const_op_iterator; enum { - IDPos = 0, - NumPatchBytesPos = 1, - CalledFunctionPos = 2, - NumCallArgsPos = 3, - FlagsPos = 4, - CallArgsBeginPos = 5, + CalledFunctionPos = GCStatepointInst::CalledFunctionPos, + CallArgsBeginPos = GCStatepointInst::CallArgsBeginPos, }; void *operator new(size_t, unsigned) = delete; @@ -104,30 +152,17 @@ class StatepointBase { } /// Return the underlying call instruction. - CallBaseTy *getCall() const { + CallTy *getCall() const { assert(*this && "check validity first!"); return StatepointCall; } - uint64_t getFlags() const { - return cast(getCall()->getArgOperand(FlagsPos)) - ->getZExtValue(); - } + // Deprecated shims (update all callers to remove) + uint64_t getFlags() const { return getCall()->getFlags(); } + uint64_t getID() const { return getCall()->getID(); } + uint32_t getNumPatchBytes() const { return getCall()->getNumPatchBytes(); } + int getNumCallArgs() const { return getCall()->getNumCallArgs(); } - /// Return the ID associated with this statepoint. - uint64_t getID() const { - const Value *IDVal = getCall()->getArgOperand(IDPos); - return cast(IDVal)->getZExtValue(); - } - - /// Return the number of patchable bytes associated with this statepoint. - uint32_t getNumPatchBytes() const { - const Value *NumPatchBytesVal = getCall()->getArgOperand(NumPatchBytesPos); - uint64_t NumPatchBytes = - cast(NumPatchBytesVal)->getZExtValue(); - assert(isInt<32>(NumPatchBytes) && "should fit in 32 bits!"); - return NumPatchBytes; - } /// Return the value actually being called or invoked. ValueTy *getCalledValue() const { @@ -160,12 +195,6 @@ class StatepointBase { return FTy->getReturnType(); } - /// Number of arguments to be passed to the actual callee. - int getNumCallArgs() const { - const Value *NumCallArgsVal = getCall()->getArgOperand(NumCallArgsPos); - return cast(NumCallArgsVal)->getZExtValue(); - } - size_t arg_size() const { return getNumCallArgs(); } arg_iterator arg_begin() const { assert(CallArgsBeginPos <= (int)getCall()->arg_size()); @@ -291,9 +320,9 @@ class StatepointBase { /// to a gc.statepoint. class ImmutableStatepoint : public StatepointBase { + const GCStatepointInst> { using Base = StatepointBase; + const GCStatepointInst>; public: explicit ImmutableStatepoint(const Instruction *I) : Base(I) {} @@ -303,8 +332,8 @@ class ImmutableStatepoint /// A specialization of it's base class for read-write access /// to a gc.statepoint. class Statepoint - : public StatepointBase { - using Base = StatepointBase; + : public StatepointBase { + using Base = StatepointBase; public: explicit Statepoint(Instruction *I) : Base(I) {} @@ -402,9 +431,9 @@ class GCResultInst : public GCProjectionInst { }; template + typename CallTy> std::vector -StatepointBase::getRelocates() +StatepointBase::getRelocates() const { std::vector Result; diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index f6435d8b7ccc8a..d7fdc5294a0ab7 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1139,6 +1139,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf64-amdgpu"; case ELF::EM_BPF: return "elf64-bpf"; + case ELF::EM_VE: + return "elf64-ve"; default: return "elf64-unknown"; } @@ -1217,6 +1219,8 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_BPF: return IsLittleEndian ? Triple::bpfel : Triple::bpfeb; + case ELF::EM_VE: + return Triple::ve; default: return Triple::UnknownArch; } diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 22ed82289ca8ce..5d3384925631ff 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -86,6 +86,14 @@ struct FileHeader { Optional SHStrNdx; }; +struct SectionHeader { + StringRef Name; +}; + +struct SectionHeaderTable { + std::vector Sections; +}; + struct SectionName { StringRef Section; }; @@ -508,6 +516,7 @@ struct ProgramHeader { struct Object { FileHeader Header; + Optional SectionHeaders; std::vector ProgramHeaders; // An object might contain output section descriptions as well as @@ -539,6 +548,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::LinkerOption) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::CallGraphEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::NoteEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionHeader) LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Symbol) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerdefEntry) @@ -670,6 +680,14 @@ struct MappingTraits { static void mapping(IO &IO, ELFYAML::FileHeader &FileHdr); }; +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, ELFYAML::SectionHeader &SHdr); +}; + template <> struct MappingTraits { static void mapping(IO &IO, ELFYAML::ProgramHeader &FileHdr); }; diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 7ae053f59d1a0f..faf0a3186fd615 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(ML) + add_llvm_component_library(LLVMAnalysis AliasAnalysis.cpp AliasAnalysisEvaluator.cpp diff --git a/llvm/lib/Analysis/LLVMBuild.txt b/llvm/lib/Analysis/LLVMBuild.txt index d73b55f037fa3b..ef52c41da8a60c 100644 --- a/llvm/lib/Analysis/LLVMBuild.txt +++ b/llvm/lib/Analysis/LLVMBuild.txt @@ -14,6 +14,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = ML + [component_0] type = Library name = Analysis diff --git a/llvm/lib/Analysis/ML/CMakeLists.txt b/llvm/lib/Analysis/ML/CMakeLists.txt new file mode 100644 index 00000000000000..28a5f98b793b82 --- /dev/null +++ b/llvm/lib/Analysis/ML/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_component_library(LLVMMLPolicies + InlineFeaturesAnalysis.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/lib/Analysis/ML/InlineFeaturesAnalysis.cpp b/llvm/lib/Analysis/ML/InlineFeaturesAnalysis.cpp new file mode 100644 index 00000000000000..d81e9b3aaf62d1 --- /dev/null +++ b/llvm/lib/Analysis/ML/InlineFeaturesAnalysis.cpp @@ -0,0 +1,28 @@ +#include "llvm/Analysis/ML/InlineFeaturesAnalysis.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +AnalysisKey InlineFeaturesAnalysis::Key; + +InlineFeaturesAnalysis::Result +InlineFeaturesAnalysis::run(const Function &F, FunctionAnalysisManager &FAM) { + Result Ret; + Ret.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses(); + for (const auto &BB : F) { + ++Ret.BasicBlockCount; + if (const auto *BI = dyn_cast(BB.getTerminator())) { + if (BI->isConditional()) + Ret.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors(); + } else if (const auto *SI = dyn_cast(BB.getTerminator())) + Ret.BlocksReachedFromConditionalInstruction += + (SI->getNumCases() + (nullptr != SI->getDefaultDest())); + for (const auto &I : BB) + if (auto *CS = dyn_cast(&I)) { + const auto *Callee = CS->getCalledFunction(); + if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration()) + ++Ret.DirectCallsToDefinedFunctions; + } + } + return Ret; +} \ No newline at end of file diff --git a/llvm/lib/Analysis/ML/LLVMBuild.txt b/llvm/lib/Analysis/ML/LLVMBuild.txt new file mode 100644 index 00000000000000..a0bb919bb41176 --- /dev/null +++ b/llvm/lib/Analysis/ML/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/Analysis/ML/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MLPolicies +parent = Analysis +required_libraries = Core Support diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index ef33b9b1de5a36..3360fd4c37c02b 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -86,28 +86,28 @@ static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS, // The profile summary metadata may be attached either by the frontend or by // any backend passes (IR level instrumentation, for example). This method // checks if the Summary is null and if so checks if the summary metadata is now -// available in the module and parses it to get the Summary object. Returns true -// if a valid Summary is available. -bool ProfileSummaryInfo::computeSummary() { - if (Summary) - return true; +// available in the module and parses it to get the Summary object. +void ProfileSummaryInfo::refresh() { + if (hasProfileSummary()) + return; // First try to get context sensitive ProfileSummary. auto *SummaryMD = M.getProfileSummary(/* IsCS */ true); - if (SummaryMD) { + if (SummaryMD) Summary.reset(ProfileSummary::getFromMD(SummaryMD)); - return true; + + if (!hasProfileSummary()) { + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(/* IsCS */ false); + if (SummaryMD) + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); } - // This will actually return PSK_Instr or PSK_Sample summary. - SummaryMD = M.getProfileSummary(/* IsCS */ false); - if (!SummaryMD) - return false; - Summary.reset(ProfileSummary::getFromMD(SummaryMD)); - return true; + if (!hasProfileSummary()) + return; + computeThresholds(); } -Optional ProfileSummaryInfo::getProfileCount(const CallBase &Call, - BlockFrequencyInfo *BFI, - bool AllowSynthetic) { +Optional ProfileSummaryInfo::getProfileCount( + const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const { assert((isa(Call) || isa(Call)) && "We can only get profile count for call/invoke instruction."); if (hasSampleProfile()) { @@ -128,8 +128,8 @@ Optional ProfileSummaryInfo::getProfileCount(const CallBase &Call, /// Returns true if the function's entry is hot. If it returns false, it /// either means it is not hot or it is unknown whether it is hot or not (for /// example, no profile data is available). -bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const { + if (!F || !hasProfileSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining hotness is based on @@ -143,9 +143,9 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { /// hot total call edge count. /// If it returns false, it either means it is not hot or it is unknown /// (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, - BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionHotInCallGraph( + const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (isHotCount(FunctionCount.getCount())) @@ -172,9 +172,9 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, /// the total call edge count is cold. /// If it returns false, it either means it is not cold or it is unknown /// (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, - BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) +bool ProfileSummaryInfo::isFunctionColdInCallGraph( + const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = F->getEntryCount()) if (!isColdCount(FunctionCount.getCount())) @@ -196,15 +196,15 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, return true; } -bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) { +bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const { assert(hasPartialSampleProfile() && "Expect partial sample profile"); return !F.getEntryCount().hasValue(); } -template +template bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( - int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { - if (!F || !computeSummary()) + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { + if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = F->getEntryCount()) { if (isHot && @@ -237,13 +237,13 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( // Like isFunctionHotInCallGraph but for a given cutoff. bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( - int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { return isFunctionHotOrColdInCallGraphNthPercentile( PercentileCutoff, F, BFI); } bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile( - int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) { + int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { return isFunctionHotOrColdInCallGraphNthPercentile( PercentileCutoff, F, BFI); } @@ -251,12 +251,12 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile( /// Returns true if the function's entry is a cold. If it returns false, it /// either means it is not cold or it is unknown whether it is cold or not (for /// example, no profile data is available). -bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { +bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const { if (!F) return false; if (F->hasFnAttribute(Attribute::Cold)) return true; - if (!computeSummary()) + if (!hasProfileSummary()) return false; auto FunctionCount = F->getEntryCount(); // FIXME: The heuristic used below for determining coldness is based on @@ -267,8 +267,6 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { /// Compute the hot and cold thresholds. void ProfileSummaryInfo::computeThresholds() { - if (!computeSummary()) - return; auto &DetailedSummary = Summary->getDetailedSummary(); auto &HotEntry = getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot); @@ -288,8 +286,9 @@ void ProfileSummaryInfo::computeThresholds() { HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; } -Optional ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { - if (!computeSummary()) +Optional +ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { + if (!hasProfileSummary()) return None; auto iter = ThresholdCache.find(PercentileCutoff); if (iter != ThresholdCache.end()) { @@ -303,33 +302,25 @@ Optional ProfileSummaryInfo::computeThreshold(int PercentileCutoff) { return CountThreshold; } -bool ProfileSummaryInfo::hasHugeWorkingSetSize() { - if (!HasHugeWorkingSetSize) - computeThresholds(); +bool ProfileSummaryInfo::hasHugeWorkingSetSize() const { return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); } -bool ProfileSummaryInfo::hasLargeWorkingSetSize() { - if (!HasLargeWorkingSetSize) - computeThresholds(); +bool ProfileSummaryInfo::hasLargeWorkingSetSize() const { return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); } -bool ProfileSummaryInfo::isHotCount(uint64_t C) { - if (!HotCountThreshold) - computeThresholds(); +bool ProfileSummaryInfo::isHotCount(uint64_t C) const { return HotCountThreshold && C >= HotCountThreshold.getValue(); } -bool ProfileSummaryInfo::isColdCount(uint64_t C) { - if (!ColdCountThreshold) - computeThresholds(); +bool ProfileSummaryInfo::isColdCount(uint64_t C) const { return ColdCountThreshold && C <= ColdCountThreshold.getValue(); } -template +template bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, - uint64_t C) { + uint64_t C) const { auto CountThreshold = computeThreshold(PercentileCutoff); if (isHot) return CountThreshold && C >= CountThreshold.getValue(); @@ -337,41 +328,39 @@ bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, return CountThreshold && C <= CountThreshold.getValue(); } -bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) { +bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, + uint64_t C) const { return isHotOrColdCountNthPercentile(PercentileCutoff, C); } -bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, uint64_t C) { +bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, + uint64_t C) const { return isHotOrColdCountNthPercentile(PercentileCutoff, C); } -uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() { - if (!HotCountThreshold) - computeThresholds(); +uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const { return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX; } -uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() { - if (!ColdCountThreshold) - computeThresholds(); +uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const { return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; } -bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) { +bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, + BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); return Count && isHotCount(*Count); } bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); return Count && isColdCount(*Count); } -template -bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, - BlockFrequencyInfo *BFI) { +template +bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { auto Count = BFI->getBlockProfileCount(BB); if (isHot) return Count && isHotCountNthPercentile(PercentileCutoff, *Count); @@ -379,26 +368,24 @@ bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(int PercentileCutoff, return Count && isColdCountNthPercentile(PercentileCutoff, *Count); } -bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, - BlockFrequencyInfo *BFI) { +bool ProfileSummaryInfo::isHotBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { return isHotOrColdBlockNthPercentile(PercentileCutoff, BB, BFI); } -bool ProfileSummaryInfo::isColdBlockNthPercentile(int PercentileCutoff, - const BasicBlock *BB, - BlockFrequencyInfo *BFI) { +bool ProfileSummaryInfo::isColdBlockNthPercentile( + int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { return isHotOrColdBlockNthPercentile(PercentileCutoff, BB, BFI); } bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { auto C = getProfileCount(CB, BFI); return C && isHotCount(*C); } bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { auto C = getProfileCount(CB, BFI); if (C) return isColdCount(*C); @@ -408,7 +395,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, return hasSampleProfile() && CB.getCaller()->hasProfileData(); } -bool ProfileSummaryInfo::hasPartialSampleProfile() { +bool ProfileSummaryInfo::hasPartialSampleProfile() const { return hasProfileSummary() && Summary->getKind() == ProfileSummary::PSK_Sample && (PartialProfile || Summary->isPartialProfile()); diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index a44732613c0fa7..6eeffe6066dfc7 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -33,6 +33,8 @@ static cl::opt StackSafetyMaxIterations("stack-safety-max-iterations", namespace { +using GVToSSI = StackSafetyGlobalInfo::GVToSSI; + /// Rewrite an SCEV expression for a memory access address to an expression that /// represents offset from the given alloca. class AllocaOffsetRewriter : public SCEVRewriteVisitor { @@ -85,7 +87,11 @@ struct UseInfo { explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} - void updateRange(ConstantRange R) { Range = Range.unionWith(R); } + void updateRange(const ConstantRange &R) { + assert(!R.isUpperSignWrapped()); + Range = Range.unionWith(R); + assert(!Range.isUpperSignWrapped()); + } }; raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) { @@ -95,21 +101,40 @@ raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) { return OS; } -/// Calculate the allocation size of a given alloca. Returns 0 if the -/// size can not be statically determined. -uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) { - const DataLayout &DL = AI->getModule()->getDataLayout(); - TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType()); +// Check if we should bailout for such ranges. +bool isUnsafe(const ConstantRange &R) { + return R.isEmptySet() || R.isFullSet() || R.isUpperSignWrapped(); +} + +/// Calculate the allocation size of a given alloca. Returns empty range +// in case of confution. +ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) { + const DataLayout &DL = AI.getModule()->getDataLayout(); + TypeSize TS = DL.getTypeAllocSize(AI.getAllocatedType()); + unsigned PointerSize = DL.getMaxPointerSizeInBits(); + // Fallback to empty range for alloca size. + ConstantRange R = ConstantRange::getEmpty(PointerSize); if (TS.isScalable()) - return 0; - uint64_t Size = TS.getFixedSize(); - if (AI->isArrayAllocation()) { - auto C = dyn_cast(AI->getArraySize()); + return R; + APInt APSize(PointerSize, TS.getFixedSize(), true); + if (APSize.isNonPositive()) + return R; + if (AI.isArrayAllocation()) { + auto C = dyn_cast(AI.getArraySize()); if (!C) - return 0; - Size *= C->getZExtValue(); + return R; + bool Overflow = false; + APInt Mul = C->getValue(); + if (Mul.isNonPositive()) + return R; + Mul = Mul.sextOrTrunc(PointerSize); + APSize = APSize.smul_ov(Mul, Overflow); + if (Overflow) + return R; } - return Size; + R = ConstantRange(APInt::getNullValue(PointerSize), APSize); + assert(!isUnsafe(R)); + return R; } /// Describes uses of allocas and parameters inside of a single function. @@ -155,7 +180,7 @@ struct FunctionInfo { if (auto AI = dyn_cast(&I)) { auto &AS = Allocas[Pos]; O << " " << AI->getName() << "[" - << getStaticAllocaAllocationSize(AI) << "]: " << AS << "\n"; + << getStaticAllocaSizeRange(*AI).getUpper() << "]: " << AS << "\n"; ++Pos; } } @@ -206,10 +231,6 @@ class StackSafetyLocalAnalysis { bool analyzeAllUses(Value *Ptr, UseInfo &AS); - ConstantRange getRange(uint64_t Lower, uint64_t Upper) const { - return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper)); - } - public: StackSafetyLocalAnalysis(Function &F, ScalarEvolution &SE) : F(F), DL(F.getParent()->getDataLayout()), SE(SE), @@ -227,7 +248,7 @@ ConstantRange StackSafetyLocalAnalysis::offsetFrom(Value *Addr, Value *Base) { AllocaOffsetRewriter Rewriter(SE, Base); const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); ConstantRange Offset = SE.getSignedRange(Expr); - if (Offset.isEmptySet() || Offset.isFullSet() || Offset.isSignWrappedSet()) + if (isUnsafe(Offset)) return UnknownRange; return Offset.sextOrTrunc(PointerSize); } @@ -238,18 +259,30 @@ StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, // Zero-size loads and stores do not access memory. if (SizeRange.isEmptySet()) return ConstantRange::getEmpty(PointerSize); + assert(!isUnsafe(SizeRange)); + + ConstantRange Offsets = offsetFrom(Addr, Base); + if (isUnsafe(Offsets)) + return UnknownRange; - ConstantRange AccessStartRange = offsetFrom(Addr, Base); - ConstantRange AccessRange = AccessStartRange.add(SizeRange); - assert(!AccessRange.isEmptySet()); - return AccessRange; + if (Offsets.signedAddMayOverflow(SizeRange) != + ConstantRange::OverflowResult::NeverOverflows) + return UnknownRange; + Offsets = Offsets.add(SizeRange); + if (isUnsafe(Offsets)) + return UnknownRange; + return Offsets; } ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base, TypeSize Size) { - ConstantRange SizeRange = - Size.isScalable() ? UnknownRange : getRange(0, Size.getFixedSize()); - return getAccessRange(Addr, Base, SizeRange); + if (Size.isScalable()) + return UnknownRange; + APInt APSize(PointerSize, Size.getFixedSize(), true); + if (APSize.isNegative()) + return UnknownRange; + return getAccessRange( + Addr, Base, ConstantRange(APInt::getNullValue(PointerSize), APSize)); } ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( @@ -261,20 +294,19 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( if (MI->getRawDest() != U) return ConstantRange::getEmpty(PointerSize); } + auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize); if (!SE.isSCEVable(MI->getLength()->getType())) return UnknownRange; const SCEV *Expr = SE.getTruncateOrZeroExtend(SE.getSCEV(MI->getLength()), CalculationTy); - ConstantRange LenRange = SE.getSignedRange(Expr); - assert(!LenRange.isEmptySet()); - if (LenRange.isSignWrappedSet() || LenRange.isFullSet() || - LenRange.getUpper().isNegative()) + ConstantRange Sizes = SE.getSignedRange(Expr); + if (Sizes.getUpper().isNegative() || isUnsafe(Sizes)) return UnknownRange; - LenRange = LenRange.sextOrTrunc(PointerSize); + Sizes = Sizes.sextOrTrunc(PointerSize); ConstantRange SizeRange(APInt::getNullValue(PointerSize), - LenRange.getUpper() - 1); + Sizes.getUpper() - 1); return getAccessRange(U, Base, SizeRange); } @@ -427,7 +459,7 @@ class StackSafetyDataFlowAnalysis { public: StackSafetyDataFlowAnalysis( Module &M, std::function FI); - StackSafetyGlobalInfo run(); + GVToSSI run(); }; StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis( @@ -541,19 +573,18 @@ void StackSafetyDataFlowAnalysis::verifyFixedPoint() { } #endif -StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() { +GVToSSI StackSafetyDataFlowAnalysis::run() { runDataFlow(); LLVM_DEBUG(verifyFixedPoint()); - StackSafetyGlobalInfo SSI; + GVToSSI SSI; for (auto &F : Functions) SSI.emplace(F.first, makeSSI(F.second)); return SSI; } -bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { +bool setStackSafetyMetadata(Module &M, const GVToSSI &SSGI) { bool Changed = false; - unsigned Width = M.getDataLayout().getPointerSizeInBits(); for (auto &F : M.functions()) { if (F.isDeclaration() || F.hasOptNone()) continue; @@ -565,9 +596,7 @@ bool setStackSafetyMetadata(Module &M, const StackSafetyGlobalInfo &SSGI) { for (auto &I : instructions(F)) { if (auto AI = dyn_cast(&I)) { auto &AS = Summary.Allocas[Pos]; - ConstantRange AllocaRange{ - APInt(Width, 0), APInt(Width, getStaticAllocaAllocationSize(AI))}; - if (AllocaRange.contains(AS.Range)) { + if (getStaticAllocaSizeRange(*AI).contains(AS.Range)) { AI->setMetadata(M.getMDKindID("stack-safe"), MDNode::get(M.getContext(), None)); Changed = true; @@ -593,23 +622,28 @@ void StackSafetyInfo::print(raw_ostream &O, const GlobalValue &F) const { Info->Info.print(O, F.getName(), dyn_cast(&F)); } -static void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, - const Module &M) { - size_t Count = 0; - for (auto &F : M.functions()) +bool StackSafetyGlobalInfo::setMetadata(Module &M) const { + return setStackSafetyMetadata(M, SSGI); +} + +void StackSafetyGlobalInfo::print(raw_ostream &O) const { + if (SSGI.empty()) + return; + const Module &M = *SSGI.begin()->first->getParent(); + for (auto &F : M.functions()) { if (!F.isDeclaration()) { - SSI.find(&F)->second.print(O, F); + SSGI.find(&F)->second.print(O, F); O << "\n"; - ++Count; } + } for (auto &A : M.aliases()) { - SSI.find(&A)->second.print(O, A); + SSGI.find(&A)->second.print(O, A); O << "\n"; - ++Count; } - assert(Count == SSI.size() && "Unexpected functions in the result"); } +LLVM_DUMP_METHOD void StackSafetyGlobalInfo::dump() const { print(dbgs()); } + AnalysisKey StackSafetyAnalysis::Key; StackSafetyInfo StackSafetyAnalysis::run(Function &F, @@ -665,14 +699,14 @@ StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) { PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n"; - print(AM.getResult(M), OS, M); + AM.getResult(M).print(OS); return PreservedAnalyses::all(); } PreservedAnalyses StackSafetyGlobalAnnotatorPass::run(Module &M, ModuleAnalysisManager &AM) { auto &SSGI = AM.getResult(M); - (void)setStackSafetyMetadata(M, SSGI); + SSGI.setMetadata(M); return PreservedAnalyses::all(); } @@ -687,7 +721,7 @@ StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass( void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O, const Module *M) const { - ::print(SSGI, O, *M); + SSGI.print(O); } void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage( @@ -704,7 +738,7 @@ bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { .Info; }); SSGI = SSDFA.run(); - return SetMetadata ? setStackSafetyMetadata(M, SSGI) : false; + return SetMetadata ? SSGI.setMetadata(M) : false; } ModulePass *llvm::createStackSafetyGlobalInfoWrapperPass(bool SetMetadata) { diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 9e8fe96ac3a83e..a2c1b3f632af81 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4500,21 +4500,41 @@ bool LLParser::ParseGenericDINode(MDNode *&Result, bool IsDistinct) { /// ParseDISubrange: /// ::= !DISubrange(count: 30, lowerBound: 2) /// ::= !DISubrange(count: !node, lowerBound: 2) +/// ::= !DISubrange(lowerBound: !node1, upperBound: !node2, stride: !node3) bool LLParser::ParseDISubrange(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ - REQUIRED(count, MDSignedOrMDField, (-1, -1, INT64_MAX, false)); \ - OPTIONAL(lowerBound, MDSignedField, ); + OPTIONAL(count, MDSignedOrMDField, (-1, -1, INT64_MAX, false)); \ + OPTIONAL(lowerBound, MDSignedOrMDField, ); \ + OPTIONAL(upperBound, MDSignedOrMDField, ); \ + OPTIONAL(stride, MDSignedOrMDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS + Metadata *Count = nullptr; + Metadata *LowerBound = nullptr; + Metadata *UpperBound = nullptr; + Metadata *Stride = nullptr; if (count.isMDSignedField()) - Result = GET_OR_DISTINCT( - DISubrange, (Context, count.getMDSignedValue(), lowerBound.Val)); + Count = ConstantAsMetadata::get(ConstantInt::getSigned( + Type::getInt64Ty(Context), count.getMDSignedValue())); else if (count.isMDField()) - Result = GET_OR_DISTINCT( - DISubrange, (Context, count.getMDFieldValue(), lowerBound.Val)); - else - return true; + Count = count.getMDFieldValue(); + + auto convToMetadata = [&](MDSignedOrMDField Bound) -> Metadata * { + if (Bound.isMDSignedField()) + return ConstantAsMetadata::get(ConstantInt::getSigned( + Type::getInt64Ty(Context), Bound.getMDSignedValue())); + if (Bound.isMDField()) + return Bound.getMDFieldValue(); + return nullptr; + }; + + LowerBound = convToMetadata(lowerBound); + UpperBound = convToMetadata(upperBound); + Stride = convToMetadata(stride); + + Result = GET_OR_DISTINCT(DISubrange, + (Context, Count, LowerBound, UpperBound, Stride)); return false; } diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 7338d170cb48b0..34c93beebb0132 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1258,14 +1258,24 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // Operand 'count' is interpreted as: // - Signed integer (version 0) // - Metadata node (version 1) + // Operand 'lowerBound' is interpreted as: + // - Signed integer (version 0 and 1) + // - Metadata node (version 2) + // Operands 'upperBound' and 'stride' are interpreted as: + // - Metadata node (version 2) switch (Record[0] >> 1) { case 0: Val = GET_OR_DISTINCT(DISubrange, - (Context, Record[1], unrotateSign(Record.back()))); + (Context, Record[1], unrotateSign(Record[2]))); break; case 1: Val = GET_OR_DISTINCT(DISubrange, (Context, getMDOrNull(Record[1]), - unrotateSign(Record.back()))); + unrotateSign(Record[2]))); + break; + case 2: + Val = GET_OR_DISTINCT( + DISubrange, (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), + getMDOrNull(Record[3]), getMDOrNull(Record[4]))); break; default: return error("Invalid record: Unsupported version of DISubrange"); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index fd4275f7d569a7..a46339a4ec1fd5 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1519,18 +1519,15 @@ void ModuleBitcodeWriter::writeGenericDINode(const GenericDINode *N, Record.clear(); } -static uint64_t rotateSign(int64_t I) { - uint64_t U = I; - return I < 0 ? ~(U << 1) : U << 1; -} - void ModuleBitcodeWriter::writeDISubrange(const DISubrange *N, SmallVectorImpl &Record, unsigned Abbrev) { - const uint64_t Version = 1 << 1; + const uint64_t Version = 2 << 1; Record.push_back((uint64_t)N->isDistinct() | Version); Record.push_back(VE.getMetadataOrNullID(N->getRawCountNode())); - Record.push_back(rotateSign(N->getLowerBound())); + Record.push_back(VE.getMetadataOrNullID(N->getRawLowerBound())); + Record.push_back(VE.getMetadataOrNullID(N->getRawUpperBound())); + Record.push_back(VE.getMetadataOrNullID(N->getRawStride())); Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev); Record.clear(); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index de2b9bcc58c7e5..f7041c0cc92631 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1592,7 +1592,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { assert(Element->getTag() == dwarf::DW_TAG_subrange_type); const DISubrange *Subrange = cast(Element); - assert(Subrange->getLowerBound() == 0 && + assert(!Subrange->getRawLowerBound() && "codeview doesn't support subranges with lower bounds"); int64_t Count = -1; if (auto *CI = Subrange->getCount().dyn_cast()) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index dce90b3c17c0df..8d6849b4e1e353 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -768,9 +768,18 @@ static SmallVector dependencies(DbgVariable *Var) { Result.push_back(DLVar); for (auto *El : Array->getElements()) { if (auto *Subrange = dyn_cast(El)) { - auto Count = Subrange->getCount(); - if (auto *Dependency = Count.dyn_cast()) - Result.push_back(Dependency); + if (auto Count = Subrange->getCount()) + if (auto *Dependency = Count.dyn_cast()) + Result.push_back(Dependency); + if (auto LB = Subrange->getLowerBound()) + if (auto *Dependency = LB.dyn_cast()) + Result.push_back(Dependency); + if (auto UB = Subrange->getUpperBound()) + if (auto *Dependency = UB.dyn_cast()) + Result.push_back(Dependency); + if (auto ST = Subrange->getStride()) + if (auto *Dependency = ST.dyn_cast()) + Result.push_back(Dependency); } } return Result; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 62bf51d4220670..e958f38e486b04 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1349,20 +1349,40 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit // DW_AT_lower_bound and DW_AT_count attributes. - int64_t LowerBound = SR->getLowerBound(); int64_t DefaultLowerBound = getDefaultLowerBound(); int64_t Count = -1; if (auto *CI = SR->getCount().dyn_cast()) Count = CI->getSExtValue(); - if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); + auto addBoundTypeEntry = [&](dwarf::Attribute Attr, + DISubrange::BoundType Bound) -> void { + if (auto *BV = Bound.dyn_cast()) { + if (auto *VarDIE = getDIE(BV)) + addDIEEntry(DW_Subrange, Attr, *VarDIE); + } else if (auto *BE = Bound.dyn_cast()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc); + DwarfExpr.setMemoryLocationKind(); + DwarfExpr.addExpression(BE); + addBlock(DW_Subrange, Attr, DwarfExpr.finalize()); + } else if (auto *BI = Bound.dyn_cast()) { + if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 || + BI->getSExtValue() != DefaultLowerBound) + addSInt(DW_Subrange, Attr, dwarf::DW_FORM_sdata, BI->getSExtValue()); + } + }; + + addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound()); if (auto *CV = SR->getCount().dyn_cast()) { if (auto *CountVarDIE = getDIE(CV)) addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); } else if (Count != -1) addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); + + addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound()); + + addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride()); } DIE *DwarfUnit::getIndexTyDie() { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index d4c471afb3b616..ee4b43446ee1cf 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -576,7 +576,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { SmallVector Statepoints; for (BasicBlock &BB : F) for (Instruction &I : BB) - if (isStatepoint(I)) + if (isa(I)) Statepoints.push_back(&I); for (auto &I : Statepoints) EverMadeChange |= simplifyOffsetableRelocate(*I); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1d888245af9fb5..45b7d991ae727e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1534,8 +1534,28 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, if (!I2) return false; - // Check for physical registers on the instructions first to avoid cases like - // this: + // If we have an instruction which loads or stores, we can't guarantee that + // it is identical. + // + // For example, we may have + // + // %x1 = G_LOAD %addr (load N from @somewhere) + // ... + // call @foo + // ... + // %x2 = G_LOAD %addr (load N from @somewhere) + // ... + // %or = G_OR %x1, %x2 + // + // It's possible that @foo will modify whatever lives at the address we're + // loading from. To be safe, let's just assume that all loads and stores + // are different (unless we have something which is guaranteed to not + // change.) + if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) + return false; + + // Check for physical registers on the instructions first to avoid cases + // like this: // // %a = COPY $physreg // ... diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2ffcc859f80511..9969786d8d43ad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1600,9 +1600,13 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); - unsigned StackAlign = - DAG.getSubtarget().getFrameLowering()->getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering(); + unsigned Opc = + TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ? + ISD::ADD : ISD::SUB; + + unsigned StackAlign = TFL->getStackAlignment(); + Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, DAG.getConstant(-(uint64_t)Align, dl, VT)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c1b4d7431ca8f2..d40f7f92c4cb0f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1134,7 +1134,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { } if (!I.isTerminator() && !HasTailCall && - !isStatepoint(&I)) // statepoints handle their exports internally + !isa(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -2796,7 +2796,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { visitPatchpoint(I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); + LowerStatepoint(cast(I), EHPadBB); break; case Intrinsic::wasm_rethrow_in_catch: { // This is usually done in visitTargetIntrinsic, but this intrinsic is @@ -2827,7 +2827,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. - if (!isStatepoint(I)) { + if (!isa(I)) { CopyToExportRegsIfNeeded(&I); } @@ -6637,7 +6637,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, visitPatchpoint(I); return; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I)); + LowerStatepoint(cast(I)); return; case Intrinsic::experimental_gc_result: visitGCResult(cast(I)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0924939cc31061..f0b7fb0d52299a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -625,7 +625,7 @@ class SelectionDAGBuilder { // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. - void LowerStatepoint(ImmutableStatepoint ISP, + void LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB = nullptr); void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 279e53877dc8e0..a0cfd3eb729f01 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1518,8 +1518,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // to keep track of gc-relocates for a particular gc-statepoint. This is // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before // visitGCRelocate. - if (isa(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) && - !isGCResult(Inst)) { + if (isa(Inst) && !isa(Inst) && + !isa(Inst) && !isa(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 9a35bd41d11671..acb68405470ca8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -804,9 +804,10 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( } void -SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, +SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB /*= nullptr*/) { - assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg && + ImmutableStatepoint ISP(&I); + assert(I.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); #ifndef NDEBUG @@ -823,7 +824,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, SDValue ActualCallee; SDValue Callee = getValue(ISP.getCalledValue()); - if (ISP.getNumPatchBytes() > 0) { + if (I.getNumPatchBytes() > 0) { // If we've been asked to emit a nop sequence instead of a call instruction // for this statepoint then don't lower the call target, but use a constant // `undef` instead. Not lowering the call target lets statepoint clients @@ -835,9 +836,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, } StatepointLoweringInfo SI(DAG); - populateCallLoweringInfo(SI.CLI, ISP.getCall(), - ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, + populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos, + I.getNumCallArgs(), ActualCallee, ISP.getActualReturnType(), false /* IsPatchPoint */); // There may be duplication in the gc.relocate list; such as two copies of @@ -865,10 +865,10 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, } SI.GCArgs = ArrayRef(ISP.gc_args_begin(), ISP.gc_args_end()); - SI.StatepointInstr = ISP.getInstruction(); - SI.ID = ISP.getID(); + SI.StatepointInstr = &I; + SI.ID = I.getID(); - if (auto Opt = ISP.getCall()->getOperandBundle(LLVMContext::OB_deopt)) { + if (auto Opt = I.getOperandBundle(LLVMContext::OB_deopt)) { assert(ISP.deopt_begin() == ISP.deopt_end() && "can't list both deopt operands and deopt bundle"); auto &Inputs = Opt->Inputs; @@ -876,7 +876,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, } else { SI.DeoptState = ArrayRef(ISP.deopt_begin(), ISP.deopt_end()); } - if (auto Opt = ISP.getCall()->getOperandBundle(LLVMContext::OB_gc_transition)) { + if (auto Opt = I.getOperandBundle(LLVMContext::OB_gc_transition)) { assert(ISP.gc_transition_args_begin() == ISP.gc_transition_args_end() && "can't list both gc_transition operands and bundle"); auto &Inputs = Opt->Inputs; @@ -886,8 +886,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, ISP.gc_transition_args_end()); } - SI.StatepointFlags = ISP.getFlags(); - SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.StatepointFlags = I.getFlags(); + SI.NumPatchBytes = I.getNumPatchBytes(); SI.EHPadBB = EHPadBB; SDValue ReturnValue = LowerAsSTATEPOINT(SI); @@ -896,7 +896,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, const GCResultInst *GCResult = ISP.getGCResult(); Type *RetTy = ISP.getActualReturnType(); if (!RetTy->isVoidTy() && GCResult) { - if (GCResult->getParent() != ISP.getCall()->getParent()) { + if (GCResult->getParent() != I.getParent()) { // Result value will be used in a different basic block so we need to // export it now. Default exporting mechanism will not work here because // statepoint call has a different type than the actual call. It means @@ -908,22 +908,22 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, - ISP.getCall()->getCallingConv()); + I.getCallingConv()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); - FuncInfo.ValueMap[ISP.getInstruction()] = Reg; + FuncInfo.ValueMap[&I] = Reg; } else { // Result value will be used in a same basic block. Don't export it or // perform any explicit register copies. // We'll replace the actuall call node shortly. gc_result will grab // this value. - setValue(ISP.getInstruction(), ReturnValue); + setValue(&I, ReturnValue); } } else { // The token value is never used from here on, just generate a poison value - setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc())); } } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d8da6431bff169..241357be53941e 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -669,6 +669,7 @@ void TargetPassConfig::addIRPasses() { // Run loop strength reduction before anything else. if (!DisableLSR) { + addPass(createCanonicalizeFreezeInLoopsPass()); addPass(createLoopStrengthReducePass()); if (PrintLSR) addPass(createPrintFunctionPass(dbgs(), diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 72da461ddcb86d..68edb6bad9396b 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1858,9 +1858,34 @@ static void writeDISubrange(raw_ostream &Out, const DISubrange *N, if (auto *CE = N->getCount().dyn_cast()) Printer.printInt("count", CE->getSExtValue(), /* ShouldSkipZero */ false); else - Printer.printMetadata("count", N->getCount().dyn_cast(), - /*ShouldSkipNull */ false); - Printer.printInt("lowerBound", N->getLowerBound()); + Printer.printMetadata("count", N->getCount().dyn_cast(), + /*ShouldSkipNull */ true); + + // A lowerBound of constant 0 should not be skipped, since it is different + // from an unspecified lower bound (= nullptr). + auto *LBound = N->getRawLowerBound(); + if (auto *LE = dyn_cast_or_null(LBound)) { + auto *LV = cast(LE->getValue()); + Printer.printInt("lowerBound", LV->getSExtValue(), + /* ShouldSkipZero */ false); + } else + Printer.printMetadata("lowerBound", LBound, /*ShouldSkipNull */ true); + + auto *UBound = N->getRawUpperBound(); + if (auto *UE = dyn_cast_or_null(UBound)) { + auto *UV = cast(UE->getValue()); + Printer.printInt("upperBound", UV->getSExtValue(), + /* ShouldSkipZero */ false); + } else + Printer.printMetadata("upperBound", UBound, /*ShouldSkipNull */ true); + + auto *Stride = N->getRawStride(); + if (auto *SE = dyn_cast_or_null(Stride)) { + auto *SV = cast(SE->getValue()); + Printer.printInt("stride", SV->getSExtValue(), /* ShouldSkipZero */ false); + } else + Printer.printMetadata("stride", Stride, /*ShouldSkipNull */ true); + Out << ")"; } diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 122cfe5d5fcaba..191668dacc1886 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1174,6 +1174,17 @@ AttributeList AttributeList::get(LLVMContext &C, unsigned Index, return get(C, Attrs); } +AttributeList AttributeList::get(LLVMContext &C, unsigned Index, + ArrayRef Kinds, + ArrayRef Values) { + assert(Kinds.size() == Values.size() && "Mismatched attribute values."); + SmallVector, 8> Attrs; + auto VI = Values.begin(); + for (const auto K : Kinds) + Attrs.emplace_back(Index, Attribute::get(C, K, *VI++)); + return get(C, Attrs); +} + AttributeList AttributeList::get(LLVMContext &C, unsigned Index, ArrayRef Kinds) { SmallVector, 8> Attrs; diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 95841be534777a..45cbbb3a60370b 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -625,11 +625,22 @@ DITypeRefArray DIBuilder::getOrCreateTypeArray(ArrayRef Elements) { } DISubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { - return DISubrange::get(VMContext, Count, Lo); + auto *LB = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(VMContext), Lo)); + auto *CountNode = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(VMContext), Count)); + return DISubrange::get(VMContext, CountNode, LB, nullptr, nullptr); } DISubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, Metadata *CountNode) { - return DISubrange::get(VMContext, CountNode, Lo); + auto *LB = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(VMContext), Lo)); + return DISubrange::get(VMContext, CountNode, LB, nullptr, nullptr); +} + +DISubrange *DIBuilder::getOrCreateSubrange(Metadata *CountNode, Metadata *LB, + Metadata *UB, Metadata *Stride) { + return DISubrange::get(VMContext, CountNode, LB, UB, Stride); } static void checkGlobalVariableScope(DIScope *Context) { diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index f1e946c1902e6a..ea90d6842c3176 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -336,15 +336,103 @@ DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo, StorageType Storage, bool ShouldCreate) { auto *CountNode = ConstantAsMetadata::get( ConstantInt::getSigned(Type::getInt64Ty(Context), Count)); - return getImpl(Context, CountNode, Lo, Storage, ShouldCreate); + auto *LB = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), Lo)); + return getImpl(Context, CountNode, LB, nullptr, nullptr, Storage, + ShouldCreate); } DISubrange *DISubrange::getImpl(LLVMContext &Context, Metadata *CountNode, int64_t Lo, StorageType Storage, bool ShouldCreate) { - DEFINE_GETIMPL_LOOKUP(DISubrange, (CountNode, Lo)); - Metadata *Ops[] = { CountNode }; - DEFINE_GETIMPL_STORE(DISubrange, (CountNode, Lo), Ops); + auto *LB = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), Lo)); + return getImpl(Context, CountNode, LB, nullptr, nullptr, Storage, + ShouldCreate); +} + +DISubrange *DISubrange::getImpl(LLVMContext &Context, Metadata *CountNode, + Metadata *LB, Metadata *UB, Metadata *Stride, + StorageType Storage, bool ShouldCreate) { + DEFINE_GETIMPL_LOOKUP(DISubrange, (CountNode, LB, UB, Stride)); + Metadata *Ops[] = {CountNode, LB, UB, Stride}; + DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DISubrange, Ops); +} + +DISubrange::CountType DISubrange::getCount() const { + if (!getRawCountNode()) + return CountType(); + + if (auto *MD = dyn_cast(getRawCountNode())) + return CountType(cast(MD->getValue())); + + if (auto *DV = dyn_cast(getRawCountNode())) + return CountType(DV); + + return CountType(); +} + +DISubrange::BoundType DISubrange::getLowerBound() const { + Metadata *LB = getRawLowerBound(); + if (!LB) + return BoundType(); + + assert((isa(LB) || isa(LB) || + isa(LB)) && + "LowerBound must be signed constant or DIVariable or DIExpression"); + + if (auto *MD = dyn_cast(LB)) + return BoundType(cast(MD->getValue())); + + if (auto *MD = dyn_cast(LB)) + return BoundType(MD); + + if (auto *MD = dyn_cast(LB)) + return BoundType(MD); + + return BoundType(); +} + +DISubrange::BoundType DISubrange::getUpperBound() const { + Metadata *UB = getRawUpperBound(); + if (!UB) + return BoundType(); + + assert((isa(UB) || isa(UB) || + isa(UB)) && + "UpperBound must be signed constant or DIVariable or DIExpression"); + + if (auto *MD = dyn_cast(UB)) + return BoundType(cast(MD->getValue())); + + if (auto *MD = dyn_cast(UB)) + return BoundType(MD); + + if (auto *MD = dyn_cast(UB)) + return BoundType(MD); + + return BoundType(); +} + +DISubrange::BoundType DISubrange::getStride() const { + Metadata *ST = getRawStride(); + if (!ST) + return BoundType(); + + assert((isa(ST) || isa(ST) || + isa(ST)) && + "Stride must be signed constant or DIVariable or DIExpression"); + + if (auto *MD = dyn_cast(ST)) + return BoundType(cast(MD->getValue())); + + if (auto *MD = dyn_cast(ST)) + return BoundType(MD); + + if (auto *MD = dyn_cast(ST)) + return BoundType(MD); + + return BoundType(); } DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, APInt Value, diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 9912808c53c2d0..1c7d8746d242fd 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -323,32 +323,46 @@ template <> struct MDNodeKeyImpl : MDNodeOpsKey { template <> struct MDNodeKeyImpl { Metadata *CountNode; - int64_t LowerBound; - - MDNodeKeyImpl(Metadata *CountNode, int64_t LowerBound) - : CountNode(CountNode), LowerBound(LowerBound) {} + Metadata *LowerBound; + Metadata *UpperBound; + Metadata *Stride; + + MDNodeKeyImpl(Metadata *CountNode, Metadata *LowerBound, Metadata *UpperBound, + Metadata *Stride) + : CountNode(CountNode), LowerBound(LowerBound), UpperBound(UpperBound), + Stride(Stride) {} MDNodeKeyImpl(const DISubrange *N) - : CountNode(N->getRawCountNode()), - LowerBound(N->getLowerBound()) {} + : CountNode(N->getRawCountNode()), LowerBound(N->getRawLowerBound()), + UpperBound(N->getRawUpperBound()), Stride(N->getRawStride()) {} bool isKeyOf(const DISubrange *RHS) const { - if (LowerBound != RHS->getLowerBound()) - return false; - - if (auto *RHSCount = RHS->getCount().dyn_cast()) - if (auto *MD = dyn_cast(CountNode)) - if (RHSCount->getSExtValue() == - cast(MD->getValue())->getSExtValue()) + auto BoundsEqual = [=](Metadata *Node1, Metadata *Node2) -> bool { + if (Node1 == Node2) + return true; + + ConstantAsMetadata *MD1 = dyn_cast_or_null(Node1); + ConstantAsMetadata *MD2 = dyn_cast_or_null(Node2); + if (MD1 && MD2) { + ConstantInt *CV1 = cast(MD1->getValue()); + ConstantInt *CV2 = cast(MD2->getValue()); + if (CV1->getSExtValue() == CV2->getSExtValue()) return true; + } + return false; + }; - return CountNode == RHS->getRawCountNode(); + return BoundsEqual(CountNode, RHS->getRawCountNode()) && + BoundsEqual(LowerBound, RHS->getRawLowerBound()) && + BoundsEqual(UpperBound, RHS->getRawUpperBound()) && + BoundsEqual(Stride, RHS->getRawStride()); } unsigned getHashValue() const { - if (auto *MD = dyn_cast(CountNode)) - return hash_combine(cast(MD->getValue())->getSExtValue(), - LowerBound); - return hash_combine(CountNode, LowerBound); + if (CountNode) + if (auto *MD = dyn_cast(CountNode)) + return hash_combine(cast(MD->getValue())->getSExtValue(), + LowerBound, UpperBound, Stride); + return hash_combine(CountNode, LowerBound, UpperBound, Stride); } }; diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index c9aa85bb4c96e8..01c01322615958 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -783,7 +783,7 @@ void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS, void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared, AvailableValueSet &Available) { - if (isStatepoint(I)) { + if (isa(I)) { Cleared = true; Available.clear(); } else if (containsGCPtrType(I.getType())) diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp index fce89b42e9bf67..53b0d1e0aa3593 100644 --- a/llvm/lib/IR/Statepoint.cpp +++ b/llvm/lib/IR/Statepoint.cpp @@ -18,15 +18,11 @@ using namespace llvm; bool llvm::isStatepoint(const CallBase *Call) { - if (auto *F = Call->getCalledFunction()) - return F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint; - return false; + return isa(Call); } bool llvm::isStatepoint(const Value *V) { - if (auto *Call = dyn_cast(V)) - return isStatepoint(Call); - return false; + return isa(V); } bool llvm::isStatepoint(const Value &V) { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f4680fffa8582e..388fc72417adea 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -893,12 +893,30 @@ void Verifier::visitDIScope(const DIScope &N) { void Verifier::visitDISubrange(const DISubrange &N) { AssertDI(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N); + AssertDI(N.getRawCountNode() || N.getRawUpperBound(), + "Subrange must contain count or upperBound", &N); + AssertDI(!N.getRawCountNode() || !N.getRawUpperBound(), + "Subrange can have any one of count or upperBound", &N); + AssertDI(!N.getRawCountNode() || N.getCount(), + "Count must either be a signed constant or a DIVariable", &N); auto Count = N.getCount(); - AssertDI(Count, "Count must either be a signed constant or a DIVariable", - &N); - AssertDI(!Count.is() || - Count.get()->getSExtValue() >= -1, + AssertDI(!Count || !Count.is() || + Count.get()->getSExtValue() >= -1, "invalid subrange count", &N); + auto *LBound = N.getRawLowerBound(); + AssertDI(!LBound || isa(LBound) || + isa(LBound) || isa(LBound), + "LowerBound must be signed constant or DIVariable or DIExpression", + &N); + auto *UBound = N.getRawUpperBound(); + AssertDI(!UBound || isa(UBound) || + isa(UBound) || isa(UBound), + "UpperBound must be signed constant or DIVariable or DIExpression", + &N); + auto *Stride = N.getRawStride(); + AssertDI(!Stride || isa(Stride) || + isa(Stride) || isa(Stride), + "Stride must be signed constant or DIVariable or DIExpression", &N); } void Verifier::visitDIEnumerator(const DIEnumerator &N) { @@ -4702,14 +4720,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { LandingPad->getParent()); Assert(InvokeBB->getTerminator(), "safepoint block should be well formed", InvokeBB); - Assert(isStatepoint(InvokeBB->getTerminator()), + Assert(isa(InvokeBB->getTerminator()), "gc relocate should be linked to a statepoint", InvokeBB); } else { // In all other cases relocate should be tied to the statepoint directly. // This covers relocates on a normal return path of invoke statepoint and // relocates of a call statepoint. auto Token = Call.getArgOperand(0); - Assert(isa(Token) && isStatepoint(cast(Token)), + Assert(isa(Token), "gc relocate is incorrectly tied to the statepoint", Call, Token); } diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index e60ddf142ac3d1..2515695095a1c0 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -145,6 +145,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; + case ELF::EM_VE: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/VE.def" + default: + break; + } + break; default: break; } diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 78093491704be1..2b7bad674fa46c 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -218,6 +218,8 @@ template class ELFState { void assignSectionAddress(Elf_Shdr &SHeader, ELFYAML::Section *YAMLSec); + DenseMap buildSectionHeaderReorderMap(); + BumpPtrAllocator StringAlloc; uint64_t alignToOffset(ContiguousBlobAccumulator &CBA, uint64_t Align, llvm::Optional Offset); @@ -318,12 +320,29 @@ void ELFState::writeELFHeader(ContiguousBlobAccumulator &CBA, raw_ostream // other sections to the end of the file. uint64_t SHOff = alignToOffset(CBA, sizeof(typename ELFT::uint), /*Offset=*/None); - Header.e_shoff = - Doc.Header.SHOff ? typename ELFT::uint(*Doc.Header.SHOff) : SHOff; - Header.e_shnum = - Doc.Header.SHNum ? (uint16_t)*Doc.Header.SHNum : Doc.getSections().size(); - Header.e_shstrndx = Doc.Header.SHStrNdx ? (uint16_t)*Doc.Header.SHStrNdx - : SN2I.get(".shstrtab"); + + if (Doc.Header.SHOff) + Header.e_shoff = *Doc.Header.SHOff; + else if (Doc.SectionHeaders && Doc.SectionHeaders->Sections.empty()) + Header.e_shoff = 0; + else + Header.e_shoff = SHOff; + + if (Doc.Header.SHNum) + Header.e_shnum = *Doc.Header.SHNum; + else if (!Doc.SectionHeaders) + Header.e_shnum = Doc.getSections().size(); + else if (Doc.SectionHeaders->Sections.empty()) + Header.e_shnum = 0; + else + Header.e_shnum = Doc.SectionHeaders->Sections.size() + /*Null section*/ 1; + + if (Doc.Header.SHStrNdx) + Header.e_shstrndx = *Doc.Header.SHStrNdx; + else if (!Doc.SectionHeaders || !Doc.SectionHeaders->Sections.empty()) + Header.e_shstrndx = SN2I.get(".shstrtab"); + else + Header.e_shstrndx = 0; OS.write((const char *)&Header, sizeof(Header)); } @@ -1447,14 +1466,50 @@ void ELFState::writeFill(ELFYAML::Fill &Fill, Fill.Pattern->writeAsBinary(OS, Fill.Size - Written); } +template +DenseMap ELFState::buildSectionHeaderReorderMap() { + if (!Doc.SectionHeaders || Doc.SectionHeaders->Sections.empty()) + return DenseMap(); + + DenseMap Ret; + size_t SecNdx = 0; + StringSet<> Seen; + for (const ELFYAML::SectionHeader &Hdr : Doc.SectionHeaders->Sections) { + if (!Ret.try_emplace(Hdr.Name, ++SecNdx).second) + reportError("repeated section name: '" + Hdr.Name + + "' in the section header description"); + Seen.insert(Hdr.Name); + } + + for (const ELFYAML::Section *S : Doc.getSections()) { + // Ignore special first SHT_NULL section. + if (S == Doc.getSections().front()) + continue; + if (!Seen.count(S->Name)) + reportError("section '" + S->Name + + "' should be present in the 'Sections' list"); + Seen.erase(S->Name); + } + + for (const auto &It : Seen) + reportError("section header contains undefined section '" + It.getKey() + + "'"); + return Ret; +} + template void ELFState::buildSectionIndex() { + // A YAML description can have an explicit section header declaration that allows + // to change the order of section headers. + DenseMap ReorderMap = buildSectionHeaderReorderMap(); + size_t SecNdx = -1; for (const std::unique_ptr &C : Doc.Chunks) { if (!isa(C.get())) continue; ++SecNdx; - if (!SN2I.addName(C->Name, SecNdx)) + size_t Index = ReorderMap.empty() ? SecNdx : ReorderMap.lookup(C->Name); + if (!SN2I.addName(C->Name, Index)) llvm_unreachable("buildSectionIndex() failed"); DotShStrtab.add(ELFYAML::dropUniqueSuffix(C->Name)); } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 445fcbc412ba2b..d3e4d2ee3bd85d 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -221,6 +221,7 @@ void ScalarEnumerationTraits::enumeration( ECase(EM_RISCV); ECase(EM_LANAI); ECase(EM_BPF); + ECase(EM_VE); #undef ECase IO.enumFallback(Value); } @@ -662,6 +663,9 @@ void ScalarEnumerationTraits::enumeration( case ELF::EM_BPF: #include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; + case ELF::EM_VE: +#include "llvm/BinaryFormat/ELFRelocs/VE.def" + break; case ELF::EM_PPC64: #include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" break; @@ -828,6 +832,16 @@ void ScalarBitSetTraits::bitset( #undef BCase } +void MappingTraits::mapping( + IO &IO, ELFYAML::SectionHeader &SHdr) { + IO.mapRequired("Name", SHdr.Name); +} + +void MappingTraits::mapping( + IO &IO, ELFYAML::SectionHeaderTable &SectionHeader) { + IO.mapRequired("Sections", SectionHeader.Sections); +} + void MappingTraits::mapping(IO &IO, ELFYAML::FileHeader &FileHdr) { IO.mapRequired("Class", FileHdr.Class); @@ -1634,6 +1648,7 @@ void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { IO.setContext(&Object); IO.mapTag("!ELF", true); IO.mapRequired("FileHeader", Object.Header); + IO.mapOptional("SectionHeaderTable", Object.SectionHeaders); IO.mapOptional("ProgramHeaders", Object.ProgramHeaders); IO.mapOptional("Sections", Object.Chunks); IO.mapOptional("Symbols", Object.Symbols); diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index 5a38fef508540a..f8661e0c3c3175 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -15,6 +15,8 @@ #include "llvm/ObjectYAML/DWARFEmitter.h" #include "llvm/ObjectYAML/ObjectYAML.h" #include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" @@ -33,12 +35,12 @@ class MachOWriter { memset(reinterpret_cast(&Header), 0, sizeof(MachO::mach_header_64)); } - void writeMachO(raw_ostream &OS); + Error writeMachO(raw_ostream &OS); private: void writeHeader(raw_ostream &OS); void writeLoadCommands(raw_ostream &OS); - void writeSectionData(raw_ostream &OS); + Error writeSectionData(raw_ostream &OS); void writeRelocations(raw_ostream &OS); void writeLinkEditData(raw_ostream &OS); @@ -66,14 +68,16 @@ class MachOWriter { bool FoundLinkEditSeg = false; }; -void MachOWriter::writeMachO(raw_ostream &OS) { +Error MachOWriter::writeMachO(raw_ostream &OS) { fileStart = OS.tell(); writeHeader(OS); writeLoadCommands(OS); - writeSectionData(OS); + if (Error Err = writeSectionData(OS)) + return Err; writeRelocations(OS); if (!FoundLinkEditSeg) writeLinkEditData(OS); + return Error::success(); } void MachOWriter::writeHeader(raw_ostream &OS) { @@ -261,7 +265,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) { } } -void MachOWriter::writeSectionData(raw_ostream &OS) { +Error MachOWriter::writeSectionData(raw_ostream &OS) { for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SEGMENT: @@ -277,9 +281,10 @@ void MachOWriter::writeSectionData(raw_ostream &OS) { ZeroToOffset(OS, Sec.offset); // Zero Fill any data between the end of the last thing we wrote and the // start of this section. - assert((OS.tell() - fileStart <= Sec.offset || - Sec.offset == (uint32_t)0) && - "Wrote too much data somewhere, section offsets don't line up."); + if (OS.tell() - fileStart > Sec.offset && Sec.offset != (uint32_t)0) + return createStringError( + errc::invalid_argument, + "wrote too much data somewhere, section offsets don't line up"); if (0 == strncmp(&Sec.segname[0], "__DWARF", 16)) { if (0 == strncmp(&Sec.sectname[0], "__debug_str", 16)) { DWARFYAML::EmitDebugStr(OS, Obj.DWARF); @@ -323,6 +328,8 @@ void MachOWriter::writeSectionData(raw_ostream &OS) { break; } } + + return Error::success(); } // The implementation of makeRelocationInfo and makeScatteredRelocationInfo is @@ -528,7 +535,7 @@ class UniversalWriter { UniversalWriter(yaml::YamlObjectFile &ObjectFile) : ObjectFile(ObjectFile), fileStart(0) {} - void writeMachO(raw_ostream &OS); + Error writeMachO(raw_ostream &OS); private: void writeFatHeader(raw_ostream &OS); @@ -540,28 +547,33 @@ class UniversalWriter { uint64_t fileStart; }; -void UniversalWriter::writeMachO(raw_ostream &OS) { +Error UniversalWriter::writeMachO(raw_ostream &OS) { fileStart = OS.tell(); if (ObjectFile.MachO) { MachOWriter Writer(*ObjectFile.MachO); - Writer.writeMachO(OS); - return; + return Writer.writeMachO(OS); } writeFatHeader(OS); writeFatArchs(OS); auto &FatFile = *ObjectFile.FatMachO; - assert(FatFile.FatArchs.size() >= FatFile.Slices.size() && - "Cannot write Slices if not decribed in FatArches"); + if (FatFile.FatArchs.size() < FatFile.Slices.size()) + return createStringError( + errc::invalid_argument, + "cannot write 'Slices' if not described in 'FatArches'"); + for (size_t i = 0; i < FatFile.Slices.size(); i++) { ZeroToOffset(OS, FatFile.FatArchs[i].offset); MachOWriter Writer(FatFile.Slices[i]); - Writer.writeMachO(OS); + if (Error Err = Writer.writeMachO(OS)) + return Err; auto SliceEnd = FatFile.FatArchs[i].offset + FatFile.FatArchs[i].size; ZeroToOffset(OS, SliceEnd); } + + return Error::success(); } void UniversalWriter::writeFatHeader(raw_ostream &OS) { @@ -629,9 +641,13 @@ void UniversalWriter::ZeroToOffset(raw_ostream &OS, size_t Offset) { namespace llvm { namespace yaml { -bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler /*EH*/) { +bool yaml2macho(YamlObjectFile &Doc, raw_ostream &Out, ErrorHandler EH) { UniversalWriter Writer(Doc); - Writer.writeMachO(Out); + if (Error Err = Writer.writeMachO(Out)) { + handleAllErrors(std::move(Err), + [&](const ErrorInfoBase &Err) { EH(Err.message()); }); + return false; + } return true; } diff --git a/llvm/lib/Passes/LLVMBuild.txt b/llvm/lib/Passes/LLVMBuild.txt index 438fc5c7c2d461..14586b640849ea 100644 --- a/llvm/lib/Passes/LLVMBuild.txt +++ b/llvm/lib/Passes/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Passes parent = Libraries -required_libraries = AggressiveInstCombine Analysis CodeGen Core Coroutines IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation +required_libraries = AggressiveInstCombine Analysis MLPolicies CodeGen Core Coroutines IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 014ef836e2c309..0999f7872d12c0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -40,6 +40,7 @@ #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" +#include "llvm/Analysis/ML/InlineFeaturesAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 03c4379f2468b5..dd75a418925bcd 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -134,6 +134,7 @@ FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) +FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis()) FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis()) FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index aa7e79652af95a..25612b7e8f232b 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1009,7 +1009,7 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver, } } - if (!Token.empty()) + if (State == UNQUOTED) AddToken(Saver.save(Token.str())); } diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp index 300eea865f91b3..a1a37c972b8cb8 100644 --- a/llvm/lib/Support/FileCheck.cpp +++ b/llvm/lib/Support/FileCheck.cpp @@ -17,6 +17,7 @@ #include "FileCheckImpl.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/CheckedArithmetic.h" #include "llvm/Support/FormatVariadic.h" #include #include @@ -31,6 +32,8 @@ StringRef ExpressionFormat::toString() const { return StringRef(""); case Kind::Unsigned: return StringRef("%u"); + case Kind::Signed: + return StringRef("%d"); case Kind::HexUpper: return StringRef("%X"); case Kind::HexLower: @@ -43,6 +46,8 @@ Expected ExpressionFormat::getWildcardRegex() const { switch (Value) { case Kind::Unsigned: return StringRef("[0-9]+"); + case Kind::Signed: + return StringRef("-?[0-9]+"); case Kind::HexUpper: return StringRef("[0-9A-F]+"); case Kind::HexLower: @@ -54,43 +59,188 @@ Expected ExpressionFormat::getWildcardRegex() const { } Expected -ExpressionFormat::getMatchingString(uint64_t IntegerValue) const { +ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const { + if (Value == Kind::Signed) { + Expected SignedValue = IntegerValue.getSignedValue(); + if (!SignedValue) + return SignedValue.takeError(); + return itostr(*SignedValue); + } + + Expected UnsignedValue = IntegerValue.getUnsignedValue(); + if (!UnsignedValue) + return UnsignedValue.takeError(); switch (Value) { case Kind::Unsigned: - return utostr(IntegerValue); + return utostr(*UnsignedValue); case Kind::HexUpper: - return utohexstr(IntegerValue, /*LowerCase=*/false); + return utohexstr(*UnsignedValue, /*LowerCase=*/false); case Kind::HexLower: - return utohexstr(IntegerValue, /*LowerCase=*/true); + return utohexstr(*UnsignedValue, /*LowerCase=*/true); default: return createStringError(std::errc::invalid_argument, "trying to match value with invalid format"); } } -Expected +Expected ExpressionFormat::valueFromStringRepr(StringRef StrVal, const SourceMgr &SM) const { + bool ValueIsSigned = Value == Kind::Signed; + StringRef OverflowErrorStr = "unable to represent numeric value"; + if (ValueIsSigned) { + int64_t SignedValue; + + if (StrVal.getAsInteger(10, SignedValue)) + return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); + + return ExpressionValue(SignedValue); + } + bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower; - uint64_t IntegerValue; - if (StrVal.getAsInteger(Hex ? 16 : 10, IntegerValue)) - return ErrorDiagnostic::get(SM, StrVal, - "unable to represent numeric value"); + uint64_t UnsignedValue; + if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue)) + return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr); - return IntegerValue; + return ExpressionValue(UnsignedValue); } -Expected NumericVariableUse::eval() const { - Optional Value = Variable->getValue(); +static int64_t getAsSigned(uint64_t UnsignedValue) { + // Use memcpy to reinterpret the bitpattern in Value since casting to + // signed is implementation-defined if the unsigned value is too big to be + // represented in the signed type and using an union violates type aliasing + // rules. + int64_t SignedValue; + memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue)); + return SignedValue; +} + +Expected ExpressionValue::getSignedValue() const { + if (Negative) + return getAsSigned(Value); + + if (Value > (uint64_t)std::numeric_limits::max()) + return make_error(); + + // Value is in the representable range of int64_t so we can use cast. + return static_cast(Value); +} + +Expected ExpressionValue::getUnsignedValue() const { + if (Negative) + return make_error(); + + return Value; +} + +ExpressionValue ExpressionValue::getAbsolute() const { + if (!Negative) + return *this; + + int64_t SignedValue = getAsSigned(Value); + int64_t MaxInt64 = std::numeric_limits::max(); + // Absolute value can be represented as int64_t. + if (SignedValue >= -MaxInt64) + return ExpressionValue(-getAsSigned(Value)); + + // -X == -(max int64_t + Rem), negate each component independently. + SignedValue += MaxInt64; + uint64_t RemainingValueAbsolute = -SignedValue; + return ExpressionValue(MaxInt64 + RemainingValueAbsolute); +} + +Expected llvm::operator+(const ExpressionValue &LeftOperand, + const ExpressionValue &RightOperand) { + if (LeftOperand.isNegative() && RightOperand.isNegative()) { + int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); + int64_t RightValue = cantFail(RightOperand.getSignedValue()); + Optional Result = checkedAdd(LeftValue, RightValue); + if (!Result) + return make_error(); + + return ExpressionValue(*Result); + } + + // (-A) + B == B - A. + if (LeftOperand.isNegative()) + return RightOperand - LeftOperand.getAbsolute(); + + // A + (-B) == A - B. + if (RightOperand.isNegative()) + return LeftOperand - RightOperand.getAbsolute(); + + // Both values are positive at this point. + uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); + uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); + Optional Result = + checkedAddUnsigned(LeftValue, RightValue); + if (!Result) + return make_error(); + + return ExpressionValue(*Result); +} + +Expected llvm::operator-(const ExpressionValue &LeftOperand, + const ExpressionValue &RightOperand) { + // Result will be negative and thus might underflow. + if (LeftOperand.isNegative() && !RightOperand.isNegative()) { + int64_t LeftValue = cantFail(LeftOperand.getSignedValue()); + uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); + // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement. + if (RightValue > (uint64_t)std::numeric_limits::max()) + return make_error(); + Optional Result = + checkedSub(LeftValue, static_cast(RightValue)); + if (!Result) + return make_error(); + + return ExpressionValue(*Result); + } + + // (-A) - (-B) == B - A. + if (LeftOperand.isNegative()) + return RightOperand.getAbsolute() - LeftOperand.getAbsolute(); + + // A - (-B) == A + B. + if (RightOperand.isNegative()) + return LeftOperand + RightOperand.getAbsolute(); + + // Both values are positive at this point. + uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue()); + uint64_t RightValue = cantFail(RightOperand.getUnsignedValue()); + if (LeftValue >= RightValue) + return ExpressionValue(LeftValue - RightValue); + else { + uint64_t AbsoluteDifference = RightValue - LeftValue; + uint64_t MaxInt64 = std::numeric_limits::max(); + // Value might underflow. + if (AbsoluteDifference > MaxInt64) { + AbsoluteDifference -= MaxInt64; + int64_t Result = -MaxInt64; + int64_t MinInt64 = std::numeric_limits::min(); + // Underflow, tested by: + // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t)) + if (AbsoluteDifference > static_cast(-(MinInt64 - Result))) + return make_error(); + Result -= static_cast(AbsoluteDifference); + return ExpressionValue(Result); + } + + return ExpressionValue(-static_cast(AbsoluteDifference)); + } +} + +Expected NumericVariableUse::eval() const { + Optional Value = Variable->getValue(); if (Value) return *Value; return make_error(getExpressionStr()); } -Expected BinaryOperation::eval() const { - Expected LeftOp = LeftOperand->eval(); - Expected RightOp = RightOperand->eval(); +Expected BinaryOperation::eval() const { + Expected LeftOp = LeftOperand->eval(); + Expected RightOp = RightOperand->eval(); // Bubble up any error (e.g. undefined variables) in the recursive // evaluation. @@ -136,7 +286,8 @@ BinaryOperation::getImplicitFormat(const SourceMgr &SM) const { Expected NumericSubstitution::getResult() const { assert(ExpressionPointer->getAST() != nullptr && "Substituting empty expression"); - Expected EvaluatedValue = ExpressionPointer->getAST()->eval(); + Expected EvaluatedValue = + ExpressionPointer->getAST()->eval(); if (!EvaluatedValue) return EvaluatedValue.takeError(); ExpressionFormat Format = ExpressionPointer->getFormat(); @@ -192,6 +343,7 @@ static char popFront(StringRef &S) { return C; } +char OverflowError::ID = 0; char UndefVarError::ID = 0; char ErrorDiagnostic::ID = 0; char NotFoundError::ID = 0; @@ -295,13 +447,18 @@ Expected> Pattern::parseNumericOperand( } // Otherwise, parse it as a literal. - uint64_t LiteralValue; - StringRef OperandExpr = Expr; + int64_t SignedLiteralValue; + uint64_t UnsignedLiteralValue; + StringRef SaveExpr = Expr; + // Accept both signed and unsigned literal, default to signed literal. if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0, - LiteralValue)) { - return std::make_unique( - OperandExpr.drop_back(Expr.size()), LiteralValue); - } + UnsignedLiteralValue)) + return std::make_unique(SaveExpr.drop_back(Expr.size()), + UnsignedLiteralValue); + Expr = SaveExpr; + if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue)) + return std::make_unique(SaveExpr.drop_back(Expr.size()), + SignedLiteralValue); return ErrorDiagnostic::get(SM, Expr, "invalid operand format '" + Expr + "'"); @@ -339,14 +496,6 @@ Pattern::parseParenExpr(StringRef &Expr, Optional LineNumber, return SubExprResult; } -static uint64_t add(uint64_t LeftOp, uint64_t RightOp) { - return LeftOp + RightOp; -} - -static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { - return LeftOp - RightOp; -} - Expected> Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, std::unique_ptr LeftOp, @@ -363,10 +512,10 @@ Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr, binop_eval_t EvalBinop; switch (Operator) { case '+': - EvalBinop = add; + EvalBinop = operator+; break; case '-': - EvalBinop = sub; + EvalBinop = operator-; break; default: return ErrorDiagnostic::get( @@ -415,6 +564,9 @@ Expected> Pattern::parseNumericSubstitutionBlock( case 'u': ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Unsigned); break; + case 'd': + ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Signed); + break; case 'x': ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower); break; @@ -819,7 +971,7 @@ Expected Pattern::match(StringRef Buffer, size_t &MatchLen, if (!Substitutions.empty()) { TmpStr = RegExStr; if (LineNumber) - Context->LineVariable->setValue(*LineNumber); + Context->LineVariable->setValue(ExpressionValue(*LineNumber)); size_t InsertOffset = 0; // Substitute all string variables and expressions whose values are only @@ -828,8 +980,18 @@ Expected Pattern::match(StringRef Buffer, size_t &MatchLen, for (const auto &Substitution : Substitutions) { // Substitute and check for failure (e.g. use of undefined variable). Expected Value = Substitution->getResult(); - if (!Value) - return Value.takeError(); + if (!Value) { + // Convert to an ErrorDiagnostic to get location information. This is + // done here rather than PrintNoMatch since now we know which + // substitution block caused the overflow. + Error Err = + handleErrors(Value.takeError(), [&](const OverflowError &E) { + return ErrorDiagnostic::get(SM, Substitution->getFromString(), + "unable to substitute variable or " + "numeric expression: overflow error"); + }); + return std::move(Err); + } // Plop it into the regex at the adjusted offset. TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset, @@ -870,7 +1032,8 @@ Expected Pattern::match(StringRef Buffer, size_t &MatchLen, StringRef MatchedValue = MatchInfo[CaptureParenGroup]; ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); - Expected Value = Format.valueFromStringRepr(MatchedValue, SM); + Expected Value = + Format.valueFromStringRepr(MatchedValue, SM); if (!Value) return Value.takeError(); DefinedNumericVariable->setValue(*Value); @@ -914,17 +1077,20 @@ void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, // variables it uses. if (!MatchedValue) { bool UndefSeen = false; - handleAllErrors(MatchedValue.takeError(), [](const NotFoundError &E) {}, - // Handled in PrintNoMatch(). - [](const ErrorDiagnostic &E) {}, - [&](const UndefVarError &E) { - if (!UndefSeen) { - OS << "uses undefined variable(s):"; - UndefSeen = true; - } - OS << " "; - E.log(OS); - }); + handleAllErrors( + MatchedValue.takeError(), [](const NotFoundError &E) {}, + // Handled in PrintNoMatch(). + [](const ErrorDiagnostic &E) {}, + // Handled in match(). + [](const OverflowError &E) {}, + [&](const UndefVarError &E) { + if (!UndefSeen) { + OS << "uses undefined variable(s):"; + UndefSeen = true; + } + OS << " "; + E.log(OS); + }); } else { // Substitution succeeded. Print substituted value. OS << "with \""; @@ -2086,7 +2252,7 @@ Error FileCheckPatternContext::defineCmdlineVariables( // to, since the expression of a command-line variable definition should // only use variables defined earlier on the command-line. If not, this // is an error and we report it. - Expected Value = Expression->getAST()->eval(); + Expected Value = Expression->getAST()->eval(); if (!Value) { Errs = joinErrors(std::move(Errs), Value.takeError()); continue; diff --git a/llvm/lib/Support/FileCheckImpl.h b/llvm/lib/Support/FileCheckImpl.h index f4f2fc21a2084b..068de3da1c692e 100644 --- a/llvm/lib/Support/FileCheckImpl.h +++ b/llvm/lib/Support/FileCheckImpl.h @@ -31,6 +31,8 @@ namespace llvm { // Numeric substitution handling code. //===----------------------------------------------------------------------===// +class ExpressionValue; + /// Type representing the format an expression value should be textualized into /// for matching. Used to represent both explicit format specifiers as well as /// implicit format from using numeric variables. @@ -41,6 +43,8 @@ struct ExpressionFormat { NoFormat, /// Value is an unsigned integer and should be printed as a decimal number. Unsigned, + /// Value is a signed integer and should be printed as a decimal number. + Signed, /// Value should be printed as an uppercase hex number. HexUpper, /// Value should be printed as a lowercase hex number. @@ -80,17 +84,64 @@ struct ExpressionFormat { Expected getWildcardRegex() const; /// \returns the string representation of \p Value in the format represented - /// by this instance, or an error if the format is NoFormat. - Expected getMatchingString(uint64_t Value) const; + /// by this instance, or an error if conversion to this format failed or the + /// format is NoFormat. + Expected getMatchingString(ExpressionValue Value) const; /// \returns the value corresponding to string representation \p StrVal /// according to the matching format represented by this instance or an error /// with diagnostic against \p SM if \p StrVal does not correspond to a valid /// and representable value. - Expected valueFromStringRepr(StringRef StrVal, - const SourceMgr &SM) const; + Expected valueFromStringRepr(StringRef StrVal, + const SourceMgr &SM) const; }; +/// Class to represent an overflow error that might result when manipulating a +/// value. +class OverflowError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return std::make_error_code(std::errc::value_too_large); + } + + void log(raw_ostream &OS) const override { OS << "overflow error"; } +}; + +/// Class representing a numeric value. +class ExpressionValue { +private: + uint64_t Value; + bool Negative; + +public: + template + explicit ExpressionValue(T Val) : Value(Val), Negative(Val < 0) {} + + /// Returns true if value is signed and negative, false otherwise. + bool isNegative() const { return Negative; } + + /// \returns the value as a signed integer or an error if the value is out of + /// range. + Expected getSignedValue() const; + + /// \returns the value as an unsigned integer or an error if the value is out + /// of range. + Expected getUnsignedValue() const; + + /// \returns an unsigned ExpressionValue instance whose value is the absolute + /// value to this object's value. + ExpressionValue getAbsolute() const; +}; + +/// Performs operation and \returns its result or an error in case of failure, +/// such as if an overflow occurs. +Expected operator+(const ExpressionValue &Lhs, + const ExpressionValue &Rhs); +Expected operator-(const ExpressionValue &Lhs, + const ExpressionValue &Rhs); + /// Base class representing the AST of a given expression. class ExpressionAST { private: @@ -105,7 +156,7 @@ class ExpressionAST { /// Evaluates and \returns the value of the expression represented by this /// AST or an error if evaluation fails. - virtual Expected eval() const = 0; + virtual Expected eval() const = 0; /// \returns either the implicit format of this AST, a diagnostic against /// \p SM if implicit formats of the AST's components conflict, or NoFormat @@ -121,16 +172,15 @@ class ExpressionAST { class ExpressionLiteral : public ExpressionAST { private: /// Actual value of the literal. - uint64_t Value; + ExpressionValue Value; public: - /// Constructs a literal with the specified value parsed from - /// \p ExpressionStr. - ExpressionLiteral(StringRef ExpressionStr, uint64_t Val) + template + explicit ExpressionLiteral(StringRef ExpressionStr, T Val) : ExpressionAST(ExpressionStr), Value(Val) {} /// \returns the literal's value. - Expected eval() const override { return Value; } + Expected eval() const override { return Value; } }; /// Class to represent an undefined variable error, which quotes that @@ -190,7 +240,7 @@ class NumericVariable { ExpressionFormat ImplicitFormat; /// Value of numeric variable, if defined, or None otherwise. - Optional Value; + Optional Value; /// Line number where this variable is defined, or None if defined before /// input is parsed. Used to determine whether a variable is defined on the @@ -213,10 +263,10 @@ class NumericVariable { ExpressionFormat getImplicitFormat() const { return ImplicitFormat; } /// \returns this variable's value. - Optional getValue() const { return Value; } + Optional getValue() const { return Value; } /// Sets value of this numeric variable to \p NewValue. - void setValue(uint64_t NewValue) { Value = NewValue; } + void setValue(ExpressionValue NewValue) { Value = NewValue; } /// Clears value of this numeric variable, regardless of whether it is /// currently defined or not. @@ -238,7 +288,7 @@ class NumericVariableUse : public ExpressionAST { NumericVariableUse(StringRef Name, NumericVariable *Variable) : ExpressionAST(Name), Variable(Variable) {} /// \returns the value of the variable referenced by this instance. - Expected eval() const override; + Expected eval() const override; /// \returns implicit format of this numeric variable. Expected @@ -248,7 +298,8 @@ class NumericVariableUse : public ExpressionAST { }; /// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); +using binop_eval_t = Expected (*)(const ExpressionValue &, + const ExpressionValue &); /// Class representing a single binary operation in the AST of an expression. class BinaryOperation : public ExpressionAST { @@ -275,7 +326,7 @@ class BinaryOperation : public ExpressionAST { /// using EvalBinop on the result of recursively evaluating the operands. /// \returns the expression value or an error if an undefined numeric /// variable is used in one of the operands. - Expected eval() const override; + Expected eval() const override; /// \returns the implicit format of this AST, if any, a diagnostic against /// \p SM if the implicit formats of the AST's components conflict, or no diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index d9b3cac5e8dc0e..da68464c4a3d94 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -142,6 +142,7 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") + .Case("POWER10", "pwr10") // FIXME: If we get a simulator or machine with the capabilities of // mcpu=future, we should revisit this and add the name reported by the // simulator/machine. diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 381bf86c7d62ba..b9034862c2707e 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -80,6 +80,9 @@ class AArch64ExpandPseudo : public MachineFunctionPass { bool expandSetTagLoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned Opc, + unsigned N); }; } // end anonymous namespace @@ -595,6 +598,28 @@ bool AArch64ExpandPseudo::expandSetTagLoop( return true; } +bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Opc, unsigned N) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + MachineInstr &MI = *MBBI; + for (unsigned Offset = 0; Offset < N; ++Offset) { + int ImmOffset = MI.getOperand(2).getImm() + Offset; + bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; + assert(ImmOffset >= -256 && ImmOffset < 256 && + "Immediate spill offset out of range"); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) + .addReg( + TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), + Opc == AArch64::LDR_ZXI ? RegState::Define : 0) + .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) + .addImm(ImmOffset); + } + MI.eraseFromParent(); + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -970,6 +995,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, report_fatal_error( "Non-writeback variants of STGloop / STZGloop should not " "survive past PrologEpilogInserter."); + case AArch64::STR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); + case AArch64::STR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); + case AArch64::STR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); + case AArch64::LDR_ZZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); + case AArch64::LDR_ZZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); + case AArch64::LDR_ZZXI: + return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 564fd33ca596aa..fd07c32e5496f3 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2278,6 +2278,27 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 255; break; + case AArch64::STR_ZZZZXI: + case AArch64::LDR_ZZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 4; + MinOffset = -256; + MaxOffset = 252; + break; + case AArch64::STR_ZZZXI: + case AArch64::LDR_ZZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 3; + MinOffset = -256; + MaxOffset = 253; + break; + case AArch64::STR_ZZXI: + case AArch64::LDR_ZZXI: + Scale = TypeSize::Scalable(16); + Width = SVEMaxBytesPerVector * 2; + MinOffset = -256; + MaxOffset = 254; + break; case AArch64::LDR_PXI: case AArch64::STR_PXI: Scale = TypeSize::Scalable(2); @@ -2984,6 +3005,7 @@ void AArch64InstrInfo::storeRegToStackSlot( MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -2992,6 +3014,11 @@ void AArch64InstrInfo::storeRegToStackSlot( case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::STRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -3031,6 +3058,10 @@ void AArch64InstrInfo::storeRegToStackSlot( get(AArch64::STPXi), SrcReg, isKill, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -3049,6 +3080,10 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -3056,6 +3091,10 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -3063,19 +3102,13 @@ void AArch64InstrInfo::storeRegToStackSlot( assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); + Opc = AArch64::STR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register store without SVE"); - Opc = AArch64::STR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); @@ -3126,6 +3159,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( unsigned Opc = 0; bool Offset = true; + unsigned StackID = TargetStackID::Default; switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) @@ -3134,6 +3168,11 @@ void AArch64InstrInfo::loadRegFromStackSlot( case 2: if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRHui; + else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_PXI; + StackID = TargetStackID::SVEVector; + } break; case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { @@ -3173,6 +3212,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( get(AArch64::LDPXi), DestReg, AArch64::sube64, AArch64::subo64, FI, MMO); return; + } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZXI; + StackID = TargetStackID::SVEVector; } break; case 24: @@ -3191,6 +3234,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d; Offset = false; + } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZXI; + StackID = TargetStackID::SVEVector; } break; case 48: @@ -3198,6 +3245,10 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d; Offset = false; + } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZXI; + StackID = TargetStackID::SVEVector; } break; case 64: @@ -3205,20 +3256,14 @@ void AArch64InstrInfo::loadRegFromStackSlot( assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d; Offset = false; + } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) { + assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); + Opc = AArch64::LDR_ZZZZXI; + StackID = TargetStackID::SVEVector; } break; } - unsigned StackID = TargetStackID::Default; - if (AArch64::PPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_PXI; - StackID = TargetStackID::SVEVector; - } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) { - assert(Subtarget.hasSVE() && "Unexpected register load without SVE"); - Opc = AArch64::LDR_ZXI; - StackID = TargetStackID::SVEVector; - } assert(Opc && "Unknown register class"); MFI.setStackID(FI, StackID); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index df82680b1f6db1..54a764337324ce 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1334,6 +1334,20 @@ multiclass sve_prefetch; + // Pseudo instructions representing unpredicated LDR and STR for ZPR2,3,4. + // These get expanded to individual LDR_ZXI/STR_ZXI instructions in + // AArch64ExpandPseudoInsts. + let mayLoad = 1, hasSideEffects = 0 in { + def LDR_ZZXI : Pseudo<(outs ZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZXI : Pseudo<(outs ZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def LDR_ZZZZXI : Pseudo<(outs ZZZZ_b:$Zd), (ins GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + let mayStore = 1, hasSideEffects = 0 in { + def STR_ZZXI : Pseudo<(outs), (ins ZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZXI : Pseudo<(outs), (ins ZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + def STR_ZZZZXI : Pseudo<(outs), (ins ZZZZ_b:$Zs, GPR64sp:$sp, simm4s1:$offset),[]>, Sched<[]>; + } + def : Pat<(AArch64ptest (nxv16i1 PPR:$pg), (nxv16i1 PPR:$src)), (PTEST_PP PPR:$pg, PPR:$src)>; def : Pat<(AArch64ptest (nxv8i1 PPR:$pg), (nxv8i1 PPR:$src)), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 81676d63643dfc..fe0462a31064d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -49,6 +49,22 @@ using namespace llvm; using namespace llvm::AMDGPU; using namespace llvm::AMDGPU::HSAMD; +// We need to tell the runtime some amount ahead of time if we don't know the +// true stack size. Assume a smaller number if this is only due to dynamic / +// non-entry block allocas. +static cl::opt AssumedStackSizeForExternalCall( + "amdgpu-assume-external-call-stack-size", + cl::desc("Assumed stack use of any external call (in bytes)"), + cl::Hidden, + cl::init(16384)); + +static cl::opt AssumedStackSizeForDynamicSizeObjects( + "amdgpu-assume-dynamic-stack-object-size", + cl::desc("Assumed extra stack use if there are any " + "variable sized objects (in bytes)"), + cl::Hidden, + cl::init(4096)); + // This should get the default rounding mode from the kernel. We just set the // default here, but this could change if the OpenCL rounding mode pragmas are // used. @@ -637,8 +653,13 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.UsesFlatScratch = false; } - Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); Info.PrivateSegmentSize = FrameInfo.getStackSize(); + + // Assume a big number if there are any unknown sized objects. + Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); + if (Info.HasDynamicallySizedStack) + Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects; + if (MFI->isStackRealigned()) Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value(); @@ -907,7 +928,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( MaxVGPR = std::max(MaxVGPR, 23); MaxAGPR = std::max(MaxAGPR, 23); - CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); + CalleeFrameSize = std::max(CalleeFrameSize, + static_cast(AssumedStackSizeForExternalCall)); + Info.UsesVCC = true; Info.UsesFlatScratch = ST.hasFlatAddressSpace(); Info.HasDynamicallySizedStack = true; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 806cc482f634eb..4221e3f0537185 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1346,6 +1346,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool validateOpSel(const MCInst &Inst); bool validateVccOperand(unsigned Reg) const; bool validateVOP3Literal(const MCInst &Inst) const; + bool validateMAIAccWrite(const MCInst &Inst); unsigned getConstantBusLimit(unsigned Opcode) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; @@ -3147,6 +3148,30 @@ bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { return !isSGPR(mc2PseudoReg(Reg), TRI); } +bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { + + const unsigned Opc = Inst.getOpcode(); + + if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) + return true; + + const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + assert(Src0Idx != -1); + + const MCOperand &Src0 = Inst.getOperand(Src0Idx); + if (!Src0.isReg()) + return true; + + auto Reg = Src0.getReg(); + const MCRegisterInfo *TRI = getContext().getRegisterInfo(); + if (isSGPR(mc2PseudoReg(Reg), TRI)) { + Error(getLoc(), "source operand must be either a VGPR or an inline constant"); + return false; + } + + return true; +} + bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { const unsigned Opc = Inst.getOpcode(); @@ -3617,6 +3642,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, if (!validateSMEMOffset(Inst, Operands)) { return false; } + if (!validateMAIAccWrite(Inst)) { + return false; + } return true; } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 277e476907d0b2..b15c98c878eb53 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -362,13 +362,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address); } while (false); - if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral || - !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) { - MaxInstBytesNum = 8; - Bytes = Bytes_.slice(0, MaxInstBytesNum); - eatBytes(Bytes); - } - if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 || diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ff199388e464b4..ef5c125870d688 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3090,6 +3090,67 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, IsThisReturn ? OutVals[0] : SDValue()); } +// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC, +// except for applying the wave size scale to the increment amount. +SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl( + SDValue Op, SelectionDAG &DAG) const { + const MachineFunction &MF = DAG.getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + SDLoc dl(Op); + EVT VT = Op.getValueType(); + SDValue Tmp1 = Op; + SDValue Tmp2 = Op.getValue(1); + SDValue Tmp3 = Op.getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + Register SPReg = Info->getStackPtrOffsetReg(); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); + const GCNSubtarget &ST = MF.getSubtarget(); + const TargetFrameLowering *TFL = ST.getFrameLowering(); + unsigned Opc = + TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ? + ISD::ADD : ISD::SUB; + + SDValue ScaledSize = DAG.getNode( + ISD::SHL, dl, VT, Size, + DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32)); + + unsigned StackAlign = TFL->getStackAlignment(); + Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + Tmp2 = DAG.getCALLSEQ_END( + Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + return DAG.getMergeValues({Tmp1, Tmp2}, dl); +} + +SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // We only handle constant sizes here to allow non-entry block, static sized + // allocas. A truly dynamic value is more difficult to support because we + // don't know if the size value is uniform or not. If the size isn't uniform, + // we would need to do a wave reduction to get the maximum size to know how + // much to increment the uniform stack pointer. + SDValue Size = Op.getOperand(1); + if (isa(Size)) + return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion. + + return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG); +} + Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) @@ -4306,6 +4367,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FMINNUM_IEEE: case ISD::FMAXNUM_IEEE: return splitBinaryVectorOp(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 7ef11eba4f9cea..da0260f4ed2d1b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -337,6 +337,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; + SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index a4d11780118f28..428c21c896d502 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -114,6 +114,9 @@ class InstSI ; // Used to inject printing of "_e32" suffix for VI (there are "_e64" variants for VI) def VINTRPDst : VINTRPDstOperand ; -let Uses = [M0, EXEC] in { +let Uses = [MODE, M0, EXEC] in { // FIXME: Specify SchedRW for VINTRP instructions. @@ -109,7 +109,7 @@ defm V_INTERP_MOV_F32 : VINTRP_m < [(set f32:$vdst, (int_amdgcn_interp_mov (i32 timm:$vsrc), (i32 timm:$attrchan), (i32 timm:$attr), M0))]>; -} // End Uses = [M0, EXEC] +} // End Uses = [MODE, M0, EXEC] //===----------------------------------------------------------------------===// // Pseudo Instructions diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 984a15a39aa4c9..3e739e51810d33 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include using namespace llvm; @@ -38,6 +39,8 @@ static cl::opt EnableSpillSGPRToVGPR( cl::ReallyHidden, cl::init(true)); +std::array, 16> SIRegisterInfo::RegSplitParts; + SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { @@ -53,6 +56,30 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) RegPressureIgnoredUnits.set(*MCRegUnitIterator(AMDGPU::M0, this)); for (auto Reg : AMDGPU::VGPR_HI16RegClass) RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this)); + + // HACK: Until this is fully tablegen'd. + static llvm::once_flag InitializeRegSplitPartsFlag; + + static auto InitializeRegSplitPartsOnce = [this]() { + for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) { + unsigned Size = getSubRegIdxSize(Idx); + if (Size & 31) + continue; + std::vector &Vec = RegSplitParts[Size / 32 - 1]; + unsigned Pos = getSubRegIdxOffset(Idx); + if (Pos % Size) + continue; + Pos /= Size; + if (Vec.empty()) { + unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits. + Vec.resize(MaxNumParts); + } + Vec[Pos] = Idx; + } + }; + + + llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); } void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, @@ -1313,88 +1340,82 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const { const TargetRegisterClass * SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) { - switch (BitWidth) { - case 1: + if (BitWidth == 1) return &AMDGPU::VReg_1RegClass; - case 16: + if (BitWidth <= 16) return &AMDGPU::VGPR_LO16RegClass; - case 32: + if (BitWidth <= 32) return &AMDGPU::VGPR_32RegClass; - case 64: + if (BitWidth <= 64) return &AMDGPU::VReg_64RegClass; - case 96: + if (BitWidth <= 96) return &AMDGPU::VReg_96RegClass; - case 128: + if (BitWidth <= 128) return &AMDGPU::VReg_128RegClass; - case 160: + if (BitWidth <= 160) return &AMDGPU::VReg_160RegClass; - case 192: + if (BitWidth <= 192) return &AMDGPU::VReg_192RegClass; - case 256: + if (BitWidth <= 256) return &AMDGPU::VReg_256RegClass; - case 512: + if (BitWidth <= 512) return &AMDGPU::VReg_512RegClass; - case 1024: + if (BitWidth <= 1024) return &AMDGPU::VReg_1024RegClass; - default: - return nullptr; - } + + return nullptr; } const TargetRegisterClass * SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) { - switch (BitWidth) { - case 16: + if (BitWidth <= 16) return &AMDGPU::AGPR_LO16RegClass; - case 32: + if (BitWidth <= 32) return &AMDGPU::AGPR_32RegClass; - case 64: + if (BitWidth <= 64) return &AMDGPU::AReg_64RegClass; - case 96: + if (BitWidth <= 96) return &AMDGPU::AReg_96RegClass; - case 128: + if (BitWidth <= 128) return &AMDGPU::AReg_128RegClass; - case 160: + if (BitWidth <= 160) return &AMDGPU::AReg_160RegClass; - case 192: + if (BitWidth <= 192) return &AMDGPU::AReg_192RegClass; - case 256: + if (BitWidth <= 256) return &AMDGPU::AReg_256RegClass; - case 512: + if (BitWidth <= 512) return &AMDGPU::AReg_512RegClass; - case 1024: + if (BitWidth <= 1024) return &AMDGPU::AReg_1024RegClass; - default: - return nullptr; - } + + return nullptr; } const TargetRegisterClass * SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { - switch (BitWidth) { - case 16: + if (BitWidth <= 16) return &AMDGPU::SGPR_LO16RegClass; - case 32: + if (BitWidth <= 32) return &AMDGPU::SReg_32RegClass; - case 64: + if (BitWidth <= 64) return &AMDGPU::SReg_64RegClass; - case 96: + if (BitWidth <= 96) return &AMDGPU::SGPR_96RegClass; - case 128: + if (BitWidth <= 128) return &AMDGPU::SGPR_128RegClass; - case 160: + if (BitWidth <= 160) return &AMDGPU::SGPR_160RegClass; - case 192: + if (BitWidth <= 192) return &AMDGPU::SGPR_192RegClass; - case 256: + if (BitWidth <= 256) return &AMDGPU::SGPR_256RegClass; - case 512: + if (BitWidth <= 512) return &AMDGPU::SGPR_512RegClass; - case 1024: + if (BitWidth <= 1024) return &AMDGPU::SGPR_1024RegClass; - default: - return nullptr; - } + + return nullptr; } // FIXME: This is very slow. It might be worth creating a map from physreg to @@ -1579,65 +1600,14 @@ ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC); assert(RegBitWidth >= 32 && RegBitWidth <= 1024); - const unsigned EltBitWidth = EltSize * 8; - assert(EltBitWidth >= 32 && EltBitWidth < 1024 && isPowerOf2_32(EltBitWidth)); - const unsigned LogEltBitWidth = Log2_32(EltBitWidth); - - assert(RegBitWidth % EltBitWidth == 0); - - if (RegBitWidth == EltBitWidth) - return {}; - - static const int16_t Sub_32[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, - AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, - AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, - AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31 - }; - - static const int16_t Sub_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, - AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, - AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, - AMDGPU::sub16_sub17, AMDGPU::sub18_sub19, - AMDGPU::sub20_sub21, AMDGPU::sub22_sub23, - AMDGPU::sub24_sub25, AMDGPU::sub26_sub27, - AMDGPU::sub28_sub29, AMDGPU::sub30_sub31 - }; - - static const int16_t Sub_128[] = { - AMDGPU::sub0_sub1_sub2_sub3, - AMDGPU::sub4_sub5_sub6_sub7, - AMDGPU::sub8_sub9_sub10_sub11, - AMDGPU::sub12_sub13_sub14_sub15, - AMDGPU::sub16_sub17_sub18_sub19, - AMDGPU::sub20_sub21_sub22_sub23, - AMDGPU::sub24_sub25_sub26_sub27, - AMDGPU::sub28_sub29_sub30_sub31 - }; - - static const int16_t Sub_256[] = { - AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, - AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, - AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23, - AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - }; + const unsigned RegDWORDs = RegBitWidth / 32; + const unsigned EltDWORDs = EltSize / 4; + assert(RegSplitParts.size() + 1 >= EltDWORDs); - static const int16_t Sub_512[] = { - AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, - AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 - }; - - static const int16_t *const Subs[] = { - Sub_32, Sub_64, Sub_128, Sub_256, Sub_512 - }; + const std::vector &Parts = RegSplitParts[EltDWORDs - 1]; + const unsigned NumParts = RegDWORDs / EltDWORDs; - return makeArrayRef(Subs[LogEltBitWidth - 5], RegBitWidth >> LogEltBitWidth); + return makeArrayRef(Parts.data(), NumParts); } const TargetRegisterClass* diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 0e8feab5f16102..5a2c5b74f25785 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -33,6 +33,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { bool isWave32; BitVector RegPressureIgnoredUnits; + /// Sub reg indexes for getRegSplitParts. + /// First index represents subreg size from 1 to 16 DWORDs. + /// The inner vector is sorted by bit offset. + /// Provided a register can be fully split with given subregs, + /// all elements of the inner vector combined give a full lane mask. + static std::array, 16> RegSplitParts; + void reserveRegisterTuples(BitVector &, MCRegister Reg) const; public: diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index e9dbe93fa3cee1..4f9aaa1bc604ff 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -807,8 +807,10 @@ def S_SETREG_B32 : SOPK_Pseudo < "s_setreg_b32", (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst", - [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))] ->; + [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> { + let Defs = [MODE]; + let Uses = [MODE]; +} // FIXME: Not on SI? //def S_GETREG_REGRD_B32 : SOPK_32 , "s_getreg_regrd_b32">; @@ -819,6 +821,8 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo < "$simm16, $imm"> { let Size = 8; // Unlike every other SOPK instruction. let has_sdst = 0; + let Defs = [MODE]; + let Uses = [MODE]; } } // End hasSideEffects = 1 @@ -953,6 +957,10 @@ def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>; } // End SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasVGPRIndexMode in { +// Setting the GPR index mode is really writing the fields in the mode +// register. We don't want to add mode register uses to every +// instruction, and it's too complicated to deal with anyway. This is +// modeled just as a side effect. def S_SET_GPR_IDX_ON : SOPC <0x11, (outs), (ins SSrc_b32:$src0, GPRIdxMode:$src1), @@ -1209,13 +1217,16 @@ let SubtargetPredicate = isGFX10Plus in { } def S_WAITCNT_DEPCTR : SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">; - def S_ROUND_MODE : - SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; - def S_DENORM_MODE : - SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", - [(SIdenorm_mode (i32 timm:$simm16))]> { - let hasSideEffects = 1; - } + + let hasSideEffects = 1, Uses = [MODE], Defs = [MODE] in { + // FIXME: Should remove hasSideEffects + def S_ROUND_MODE : + SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; + def S_DENORM_MODE : + SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", + [(SIdenorm_mode (i32 timm:$simm16))]>; + } + def S_TTRACEDATA_IMM : SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">; } // End SubtargetPredicate = isGFX10Plus diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 0c2b5fbf59fccd..e46d84d513cc11 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -48,9 +48,14 @@ class VOP1_Pseudo pattern=[], bit VOP1On let mayStore = 0; let hasSideEffects = 0; + let ReadsModeReg = !or(isFloatType.ret, isFloatType.ret); + + // FIXME + // let mayRaiseFPException = ReadsModeReg; + let VOP1 = 1; let VALU = 1; - let Uses = [EXEC]; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let AsmVariantName = AMDGPUAsmVariants.Default; } @@ -186,31 +191,51 @@ def V_READFIRSTLANE_B32 : let SchedRW = [WriteDoubleCvt] in { defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; + +let mayRaiseFPException = 0 in { defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; +} + defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; + +let mayRaiseFPException = 0 in { defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; +} + } // End SchedRW = [WriteDoubleCvt] let SchedRW = [WriteFloatCvt] in { + +// XXX: Does this really not raise exceptions? The manual claims the +// 16-bit ones can. +let mayRaiseFPException = 0 in { defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; +} + defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; let FPDPRounding = 1 in { defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; } // End FPDPRounding = 1 + defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; + +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; +} // End ReadsModeReg = 0, mayRaiseFPException = 0 } // End SchedRW = [WriteFloatCvt] +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; +} // ReadsModeReg = 0, mayRaiseFPException = 0 defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; @@ -417,8 +442,11 @@ let SubtargetPredicate = isGFX9Plus in { } defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; - defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; - defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + + let mayRaiseFPException = 0 in { + defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; + defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; + } // End mayRaiseFPException = 0 } // End SubtargetPredicate = isGFX9Plus let SubtargetPredicate = isGFX9Only in { diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index c1ce1b755322d8..86dc179f94214e 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -69,9 +69,14 @@ class VOP2_Pseudo pattern=[], string suf let mayStore = 0; let hasSideEffects = 0; + let ReadsModeReg = !or(isFloatType.ret, isFloatType.ret); + + // FIXME: Set this + // let mayRaiseFPException = ReadsModeReg; + let VOP2 = 1; let VALU = 1; - let Uses = [EXEC]; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let AsmVariantName = AMDGPUAsmVariants.Default; } @@ -529,8 +534,12 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT>; // TODO: set "Uses = dst" + +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT, AMDGPUpknorm_u16_f32>; +} + defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT, AMDGPUpkrtz_f16_f32>; defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT, AMDGPUpk_u16_u32>; defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT, AMDGPUpk_i16_i32>; @@ -1253,9 +1262,9 @@ defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>; -let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>; -} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) +} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) let SubtargetPredicate = isGFX6GFX7 in { defm : VOP2eInstAliases; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 249eb69ba4c912..66a4e62a3be451 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -290,8 +290,11 @@ class VOP3_INTERP16 ArgVT> : VOPProfile { let isCommutable = 1 in { +let mayRaiseFPException = 0 in { def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile>; def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile, fmad>; +} + def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile>; def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile>; def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile, fma>; @@ -314,7 +317,7 @@ def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile>; def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile, mulhs>; } // End SchedRW = [WriteQuarterRate32] -let Uses = [VCC, EXEC] in { +let Uses = [MODE, VCC, EXEC] in { // v_div_fmas_f32: // result = src0 * src1 + src2 // if (vcc) @@ -336,15 +339,20 @@ def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> } // End isCommutable = 1 +let mayRaiseFPException = 0 in { def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile, int_amdgcn_cubeid>; def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile, int_amdgcn_cubesc>; def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile, int_amdgcn_cubetc>; def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, int_amdgcn_cubema>; +} // End mayRaiseFPException + def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; + +let mayRaiseFPException = 0 in { // XXX - Seems suspect but manual doesn't say it does def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; @@ -354,6 +362,8 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile, AMDG def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile, AMDGPUumed3>; +} // End mayRaiseFPException = 0 + def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile>; def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile>; def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile>; @@ -366,6 +376,8 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile, AMDGPUldexp, 1>; } // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 + +let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does. def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> { let SchedRW = [WriteFloatFMA, WriteSALU]; let AsmMatchConverter = ""; @@ -377,6 +389,7 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, let AsmMatchConverter = ""; let FPDPRounding = 1; } +} // End mayRaiseFPException = 0 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile>; @@ -471,7 +484,7 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; let FPDPRounding = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile, fmad>; -let Uses = [M0, EXEC] in { +let Uses = [MODE, M0, EXEC] in { // For some reason the intrinsic operands are in a different order // from the instruction operands. def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, @@ -482,7 +495,7 @@ def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i3 (i32 timm:$attr), (i1 timm:$high), M0))]>; -} // End Uses = [M0, EXEC] +} // End Uses = [M0, MODE, EXEC] } // End FPDPRounding = 1 } // End renamedInGFX9 = 1 @@ -498,7 +511,7 @@ def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile>; } // End SubtargetPredicate = isGFX9Plus -let Uses = [M0, EXEC], FPDPRounding = 1 in { +let Uses = [MODE, M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, [(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers), (i32 timm:$attrchan), @@ -512,15 +525,15 @@ def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32 def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>; -} // End Uses = [M0, EXEC], FPDPRounding = 1 +} // End Uses = [MODE, M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 -let SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC] in { +let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC] in { def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>; def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>; def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>; -} // End SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC] +} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC] let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in { @@ -826,9 +839,9 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>; -let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>; -} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) +} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>; defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 25075e179847c4..a3d973fc2f182b 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -149,10 +149,11 @@ multiclass MadFmaMixPats>; let FPDPRounding = 1 in { @@ -370,7 +371,8 @@ def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> { let isMoveImm = 1; } -let isConvergent = 1 in { +// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. +let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>; def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>; def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>; @@ -391,7 +393,7 @@ def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I3 def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>; def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>; def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>; -} // End isConvergent = 1 +} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 } // End SubtargetPredicate = HasMAIInsts diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 003a4f73c1568d..aa2fa260e7b52a 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -92,9 +92,11 @@ class VOPC_Pseudo pattern=[], let mayStore = 0; let hasSideEffects = 0; + let ReadsModeReg = isFloatType.ret; + let VALU = 1; let VOPC = 1; - let Uses = [EXEC]; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let Defs = !if(DefVcc, [VCC], []); VOPProfile Pfl = P; @@ -738,6 +740,9 @@ multiclass VOPC_CLASS_F64 : multiclass VOPCX_CLASS_F64 : VOPCX_Class_Pseudos ; +// cmp_class ignores the FP mode and faithfully reports the unmodified +// source value. +let ReadsModeReg = 0, mayRaiseFPException = 0 in { defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">; defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; @@ -747,6 +752,7 @@ let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; } +} // End ReadsModeReg = 0, mayRaiseFPException = 0 //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 42a275c6c36512..d52ad7f92997cc 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -8,6 +8,8 @@ // dummies for outer let class LetDummies { + bit ReadsModeReg; + bit mayRaiseFPException; bit isCommutable; bit isConvertibleToThreeAddress; bit isMoveImm; @@ -35,7 +37,7 @@ class VOPAnyCommon pattern> : let hasSideEffects = 0; let UseNamedOperandTable = 1; let VALU = 1; - let Uses = [EXEC]; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); } class VOP_Pseudo pattern = [], let ClampLo = P.HasClampLo; let ClampHi = P.HasClampHi; - let Uses = [EXEC]; + let ReadsModeReg = !or(isFloatType.ret, isFloatType.ret); + + // FIXME: Set this. Right now it seems regular IR operations don't + // automatically imply no FP exceptions. + // let mayRaiseFPException = ReadsModeReg; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = @@ -490,7 +497,13 @@ class VOP_SDWA_Pseudo pattern=[]> : let VALU = 1; let SDWA = 1; - let Uses = [EXEC]; + + let ReadsModeReg = !or(isFloatType.ret, isFloatType.ret); + + // FIXME: Set this. Right now it seems regular IR operations don't + // automatically imply no FP exceptions. + // let mayRaiseFPException = ReadsModeReg; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let SubtargetPredicate = HasSDWA; let AssemblerPredicate = HasSDWA; @@ -607,7 +620,13 @@ class VOP_DPP_Pseudo pattern=[]> : let VALU = 1; let DPP = 1; let Size = 8; - let Uses = [EXEC]; + + let ReadsModeReg = !or(isFloatType.ret, isFloatType.ret); + + // FIXME: Set this. Right now it seems regular IR operations don't + // automatically imply no FP exceptions. + // let mayRaiseFPException = ReadsModeReg; + let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let isConvergent = 1; string Mnemonic = OpName; diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2aef2e8610a372..49056d78302893 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2735,6 +2735,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case ARM::LOADDUAL: + case ARM::STOREDUAL: { + Register PairReg = MI.getOperand(0).getReg(); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) + .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), + Opcode == ARM::LOADDUAL ? RegState::Define : 0) + .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), + Opcode == ARM::LOADDUAL ? RegState::Define : 0); + for (unsigned i = 1; i < MI.getNumOperands(); i++) + MIB.add(MI.getOperand(i)); + MIB.add(predOps(ARMCC::AL)); + MIB.cloneMemRefs(MI); + MI.eraseFromParent(); + return true; + } } } diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index b49152e3167210..3c6f446580bbe9 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -145,6 +145,8 @@ class ARMDAGToDAGISel : public SelectionDAGISel { // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); + template + bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, @@ -1312,6 +1314,33 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, return true; } +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, + SDValue &OffImm) { + if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); + } + + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + OffImm = + CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); + return true; + } + } + + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); + return true; +} + bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. @@ -3655,6 +3684,59 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } + case ARMISD::LDRD: { + if (Subtarget->isThumb2()) + break; // TableGen handles isel in this case. + SDValue Base, RegOffset, ImmOffset; + const SDValue &Chain = N->getOperand(0); + const SDValue &Addr = N->getOperand(1); + SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); + if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { + // The register-offset variant of LDRD mandates that the register + // allocated to RegOffset is not reused in any of the remaining operands. + // This restriction is currently not enforced. Therefore emitting this + // variant is explicitly avoided. + Base = Addr; + RegOffset = CurDAG->getRegister(0, MVT::i32); + } + SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; + SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, + {MVT::Untyped, MVT::Other}, Ops); + SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + SDValue(New, 0)); + SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + SDValue(New, 0)); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), Lo); + ReplaceUses(SDValue(N, 1), Hi); + ReplaceUses(SDValue(N, 2), SDValue(New, 1)); + CurDAG->RemoveDeadNode(N); + return; + } + case ARMISD::STRD: { + if (Subtarget->isThumb2()) + break; // TableGen handles isel in this case. + SDValue Base, RegOffset, ImmOffset; + const SDValue &Chain = N->getOperand(0); + const SDValue &Addr = N->getOperand(3); + SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); + if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { + // The register-offset variant of STRD mandates that the register + // allocated to RegOffset is not reused in any of the remaining operands. + // This restriction is currently not enforced. Therefore emitting this + // variant is explicitly avoided. + Base = Addr; + RegOffset = CurDAG->getRegister(0, MVT::i32); + } + SDNode *RegPair = + createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); + SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; + SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), SDValue(New, 0)); + CurDAG->RemoveDeadNode(N); + return; + } case ARMISD::LOOP_DEC: { SDValue Ops[] = { N->getOperand(1), N->getOperand(2), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c5c99610dd3ab4..98161c34944459 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1082,6 +1082,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl // assuming that ISD::SRL and SRA of i64 are already marked custom @@ -1624,6 +1626,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::LDRD: return "ARMISD::LDRD"; + case ARMISD::STRD: return "ARMISD::STRD"; + case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; @@ -9151,6 +9156,25 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); } +void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const { + LoadSDNode *LD = cast(N); + EVT MemVT = LD->getMemoryVT(); + assert(LD->isUnindexed() && "Loads should be unindexed at this point."); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && LD->isVolatile()) { + SDLoc dl(N); + SDValue Result = DAG.getMemIntrinsicNode( + ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}), + {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand()); + SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1); + SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0); + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); + Results.append({Pair, Result.getValue(2)}); + } +} + static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast(Op.getNode()); EVT MemVT = ST->getMemoryVT(); @@ -9180,6 +9204,38 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + assert(ST->isUnindexed() && "Stores should be unindexed at this point."); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && ST->isVolatile()) { + SDNode *N = Op.getNode(); + SDLoc dl(N); + + SDValue Lo = DAG.getNode( + ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl, + MVT::i32)); + SDValue Hi = DAG.getNode( + ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl, + MVT::i32)); + + return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other), + {ST->getChain(), Lo, Hi, ST->getBasePtr()}, + MemVT, ST->getMemOperand()); + } else if (Subtarget->hasMVEIntegerOps() && + ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + MemVT == MVT::v16i1))) { + return LowerPredicateStore(Op, DAG); + } + + return SDValue(); +} + static bool isZeroVector(SDValue N) { return (ISD::isBuildVectorAllZeros(N.getNode()) || (N->getOpcode() == ARMISD::VMOVIMM && @@ -9414,7 +9470,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: - return LowerPredicateStore(Op, DAG); + return LowerSTORE(Op, DAG, Subtarget); case ISD::MLOAD: return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: @@ -9518,7 +9574,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ABS: lowerABS(N, Results, DAG); return ; - + case ISD::LOAD: + LowerLOAD(N, Results, DAG); + break; } if (Res.getNode()) Results.push_back(Res); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8d26b39b421002..4323f00f8dbcea 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -305,7 +305,11 @@ class VectorType; VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD + VST4LN_UPD, + + // Load/Store of dual registers + LDRD, + STRD }; } // end namespace ARMISD @@ -771,6 +775,8 @@ class VectorType; SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; + void LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index bb701b03991d91..6b990a59ed0e91 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -245,6 +245,12 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; +def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -2736,6 +2742,14 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { Requires<[IsARM, HasV5TE]>; } +let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { +def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr), + 64, IIC_iLoad_d_r, []>, + Requires<[IsARM, HasV5TE]> { + let AM = AddrMode3; +} +} + def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "lda", "\t$Rt, $addr", []>; def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -3014,6 +3028,14 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { } } +let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { +def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr), + 64, IIC_iStore_d_r, []>, + Requires<[IsARM, HasV5TE]> { + let AM = AddrMode3; +} +} + // Indexed stores multiclass AI2_stridx { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index b4f3901a4603f5..e2235b1c25013f 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -270,7 +270,8 @@ def t2am_imm8_offset : MemOperand, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -class T2AddrMode_Imm8s4 : MemOperand { +class T2AddrMode_Imm8s4 : MemOperand, + ComplexPattern", []> { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; @@ -1448,7 +1449,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", + [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>, Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1629,7 +1631,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", + [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>, Sched<[WriteST]>; // Indexed stores diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index cc8a4867753805..a02556a399098d 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -604,6 +604,12 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc DL = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); + + if (!isSigned) { + Register PromotedReg0 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg); + return PromotedReg0; + } Register PromotedReg0 = RegInfo.createVirtualRegister(RC); Register PromotedReg1 = RegInfo.createVirtualRegister(RC); Register PromotedReg2 = RegInfo.createVirtualRegister(RC); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 6781d09b846e7a..4298e2eaec0462 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -732,8 +732,7 @@ let isCodeGenOnly = 1 in { def : Pat<(i64 (sext GPR32:$src)), (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; -def : Pat<(i64 (zext GPR32:$src)), - (SRL_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>; +def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>; // For i64 -> i32 truncation, use the 32-bit subregister directly. def : Pat<(i32 (trunc GPR:$src)), diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index a2ceade66800c5..fe955fad042497 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -301,19 +301,16 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { // // MOV rA, rA // - // This is particularly possible to happen when sub-register support - // enabled. The special type cast insn MOV_32_64 involves different - // register class on src (i32) and dst (i64), RA could generate useless - // instruction due to this. + // Note that we cannot remove + // MOV_32_64 rA, wA + // MOV_rr_32 wA, wA + // as these two instructions having side effects, zeroing out + // top 32 bits of rA. unsigned Opcode = MI.getOpcode(); - if (Opcode == BPF::MOV_32_64 || - Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { + if (Opcode == BPF::MOV_rr) { Register dst = MI.getOperand(0).getReg(); Register src = MI.getOperand(1).getReg(); - if (Opcode == BPF::MOV_32_64) - dst = TRI->getSubReg(dst, BPF::sub_32); - if (dst != src) continue; diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 1d1f11e498c207..a6c7868f6ac250 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -51,6 +51,7 @@ def DirectivePwr6x def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">; +def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">; def DirectivePwrFuture : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">; @@ -205,6 +206,9 @@ def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true", def FeatureISA3_0 : SubtargetFeature<"isa-v30-instructions", "IsISA3_0", "true", "Enable instructions added in ISA 3.0.">; +def FeatureISA3_1 : SubtargetFeature<"isa-v31-instructions", "IsISA3_1", + "true", + "Enable instructions added in ISA 3.1.">; def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", "Enable POWER9 Altivec instructions", [FeatureISA3_0, FeatureP8Altivec]>; @@ -328,14 +332,25 @@ def ProcessorFeatures { list P9Features = !listconcat(P9InheritableFeatures, P9SpecificFeatures); + // Power10 + // For P10 CPU we assume that all of the existing features from Power9 + // still exist with the exception of those we know are Power9 specific. + list P10AdditionalFeatures = + [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops]; + list P10SpecificFeatures = []; + list P10InheritableFeatures = + !listconcat(P9InheritableFeatures, P10AdditionalFeatures); + list P10Features = + !listconcat(P10InheritableFeatures, P10SpecificFeatures); + // Future - // For future CPU we assume that all of the existing features from Power 9 - // still exist with the exception of those we know are Power 9 specific. + // For future CPU we assume that all of the existing features from Power10 + // still exist with the exception of those we know are Power10 specific. list FutureAdditionalFeatures = []; - list FutureSpecificFeatures = - [FeaturePrefixInstrs, FeaturePCRelativeMemops]; + list FutureSpecificFeatures = []; list FutureInheritableFeatures = - !listconcat(P9InheritableFeatures, FutureAdditionalFeatures); + !listconcat(P10InheritableFeatures, FutureAdditionalFeatures); list FutureFeatures = !listconcat(FutureInheritableFeatures, FutureSpecificFeatures); } @@ -540,6 +555,8 @@ def : ProcessorModel<"pwr6x", G5Model, def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>; +// No scheduler model yet. +def : ProcessorModel<"pwr10", NoSchedModel, ProcessorFeatures.P10Features>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatures>; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 42df83831113ad..53f9ac678c7b7c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1306,6 +1306,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); @@ -14913,6 +14914,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -16103,6 +16105,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { // vector 7 2 2 return true; case PPC::DIR_PWR9: + case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index cfc54df13f7924..2f332715d8cac0 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -115,6 +115,7 @@ void PPCSubtarget::initializeEnvironment() { HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; + IsISA3_1 = false; UseLongCalls = false; SecurePlt = false; VectorsUseTwoUnits = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index be1143f903e8b1..bfe39814e4cc8f 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -34,32 +34,33 @@ class StringRef; namespace PPC { // -m directive values. - enum { - DIR_NONE, - DIR_32, - DIR_440, - DIR_601, - DIR_602, - DIR_603, - DIR_7400, - DIR_750, - DIR_970, - DIR_A2, - DIR_E500, - DIR_E500mc, - DIR_E5500, - DIR_PWR3, - DIR_PWR4, - DIR_PWR5, - DIR_PWR5X, - DIR_PWR6, - DIR_PWR6X, - DIR_PWR7, - DIR_PWR8, - DIR_PWR9, - DIR_PWR_FUTURE, - DIR_64 - }; +enum { + DIR_NONE, + DIR_32, + DIR_440, + DIR_601, + DIR_602, + DIR_603, + DIR_7400, + DIR_750, + DIR_970, + DIR_A2, + DIR_E500, + DIR_E500mc, + DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, + DIR_PWR6, + DIR_PWR6X, + DIR_PWR7, + DIR_PWR8, + DIR_PWR9, + DIR_PWR10, + DIR_PWR_FUTURE, + DIR_64 +}; } class GlobalValue; @@ -138,6 +139,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; + bool IsISA3_1; bool UseLongCalls; bool SecurePlt; bool VectorsUseTwoUnits; @@ -308,6 +310,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool hasHTM() const { return HasHTM; } bool hasFloat128() const { return HasFloat128; } bool isISA3_0() const { return IsISA3_0; } + bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index a41c6b41a991b6..46c5335a558f47 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -651,11 +651,12 @@ unsigned PPCTTIImpl::getCacheLineSize() const { if (CacheLineSize.getNumOccurrences() > 0) return CacheLineSize; - // On P7, P8 or P9 we have a cache line size of 128. + // Starting with P7 we have a cache line size of 128. unsigned Directive = ST->getCPUDirective(); // Assume that Future CPU has the same cache line size as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || + Directive == PPC::DIR_PWR_FUTURE) return 128; // On other processors return a default of 64 bytes. @@ -687,9 +688,11 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready // Assume that future is the same as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 || + Directive == PPC::DIR_PWR_FUTURE) return 12; // For most things, modern systems have two execution units (and diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 91e0cdb80386a8..a5fa98ec8d926d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4758,17 +4758,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opc, MOpc; unsigned LoReg, HiReg; bool IsSigned = Opcode == ISD::SMUL_LOHI; + bool UseMULX = !IsSigned && Subtarget->hasBMI2(); switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i32: - Opc = IsSigned ? X86::IMUL32r : X86::MUL32r; - MOpc = IsSigned ? X86::IMUL32m : X86::MUL32m; - LoReg = X86::EAX; HiReg = X86::EDX; + Opc = UseMULX ? X86::MULX32rr : + IsSigned ? X86::IMUL32r : X86::MUL32r; + MOpc = UseMULX ? X86::MULX32rm : + IsSigned ? X86::IMUL32m : X86::MUL32m; + LoReg = UseMULX ? X86::EDX : X86::EAX; + HiReg = X86::EDX; break; case MVT::i64: - Opc = IsSigned ? X86::IMUL64r : X86::MUL64r; - MOpc = IsSigned ? X86::IMUL64m : X86::MUL64m; - LoReg = X86::RAX; HiReg = X86::RDX; + Opc = UseMULX ? X86::MULX64rr : + IsSigned ? X86::IMUL64r : X86::MUL64r; + MOpc = UseMULX ? X86::MULX64rm : + IsSigned ? X86::IMUL64m : X86::MUL64m; + LoReg = UseMULX ? X86::RDX : X86::RAX; + HiReg = X86::RDX; break; } @@ -4783,15 +4790,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) { SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { SDValue Chain; MachineSDNode *CNode = nullptr; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); - Chain = SDValue(CNode, 0); - InFlag = SDValue(CNode, 1); + if (UseMULX) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. ReplaceUses(N1.getValue(1), Chain); @@ -4799,27 +4815,38 @@ void X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()}); } else { SDValue Ops[] = { N1, InFlag }; - SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); - InFlag = SDValue(CNode, 0); + if (UseMULX) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + InFlag = SDValue(CNode, 0); + } } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - assert(LoReg && "Register for low half is not defined!"); - SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, - NVT, InFlag); - InFlag = ResLo.getValue(2); + if (!ResLo) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, + NVT, InFlag); + InFlag = ResLo.getValue(2); + } ReplaceUses(SDValue(Node, 0), ResLo); LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - assert(HiReg && "Register for high half is not defined!"); - SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, - NVT, InFlag); - InFlag = ResHi.getValue(2); + if (!ResHi) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, + NVT, InFlag); + InFlag = ResHi.getValue(2); + } ReplaceUses(SDValue(Node, 1), ResHi); LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d70b5a7f3a2274..8ec958338c024c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37363,14 +37363,20 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( // MOVMSK only uses the MSB from each vector element. KnownBits KnownSrc; - if (SimplifyDemandedBits(Src, APInt::getSignMask(SrcBits), DemandedElts, - KnownSrc, TLO, Depth + 1)) + APInt DemandedSrcBits = APInt::getSignMask(SrcBits); + if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO, + Depth + 1)) return true; if (KnownSrc.One[SrcBits - 1]) Known.One.setLowBits(NumElts); else if (KnownSrc.Zero[SrcBits - 1]) Known.Zero.setLowBits(NumElts); + + // Attempt to avoid multi-use os if we don't need anything from it. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); return false; } case X86ISD::BEXTR: { diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 697341443273a4..475f6bc8e9b73e 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1848,10 +1848,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; - if (M.getProfileSummary(/* IsCS */ false) == nullptr) + if (M.getProfileSummary(/* IsCS */ false) == nullptr) { M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), ProfileSummary::PSK_Sample); - + PSI->refresh(); + } // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7e20d241bbab5f..a3d5215fad4f8a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4387,7 +4387,8 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call, // TODO: This is probably something which should be expanded to all // intrinsics since the entire point of intrinsics is that // they are understandable by the optimizer. - if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call)) + if (isa(Call) || isa(Call) || + isa(Call)) return false; // The size of ByVal or InAlloca arguments is derived from the type, so we diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 72eb5cd61b0037..7579139231423e 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1613,6 +1613,7 @@ static bool annotateAllFunctions( M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), IsCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr); + PSI->refresh(); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index f5b24182edbdd0..6f16d6583340d1 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -616,7 +616,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( // If V is a constant, then it is known in all predecessors. if (Constant *KC = getKnownConstant(V, Preference)) { for (BasicBlock *Pred : predecessors(BB)) - Result.push_back(std::make_pair(KC, Pred)); + Result.emplace_back(KC, Pred); return !Result.empty(); } @@ -643,7 +643,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( // predecessor, use that information to try to thread this block. Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI); if (Constant *KC = getKnownConstant(PredCst, Preference)) - Result.push_back(std::make_pair(KC, P)); + Result.emplace_back(KC, P); } return !Result.empty(); @@ -654,13 +654,13 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); if (Constant *KC = getKnownConstant(InVal, Preference)) { - Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); + Result.emplace_back(KC, PN->getIncomingBlock(i)); } else { Constant *CI = LVI->getConstantOnEdge(InVal, PN->getIncomingBlock(i), BB, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) - Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i))); + Result.emplace_back(KC, PN->getIncomingBlock(i)); } } @@ -759,7 +759,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVal.second)); + Result.emplace_back(KC, LHSVal.second); } } @@ -811,7 +811,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( } if (Constant *KC = getKnownConstant(Res, WantInteger)) - Result.push_back(std::make_pair(KC, PredBB)); + Result.emplace_back(KC, PredBB); } return !Result.empty(); @@ -834,7 +834,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( continue; Constant *ResC = ConstantInt::get(CmpType, Res); - Result.push_back(std::make_pair(ResC, P)); + Result.emplace_back(ResC, P); } return !Result.empty(); @@ -873,7 +873,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( else continue; - Result.push_back(std::make_pair(ResC, P)); + Result.emplace_back(ResC, P); } return !Result.empty(); @@ -891,7 +891,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( Constant *V = LHSVal.first; Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst); if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVal.second)); + Result.emplace_back(KC, LHSVal.second); } return !Result.empty(); @@ -925,7 +925,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( // See if the select has a known constant value for this predecessor. if (Constant *Val = KnownCond ? TrueVal : FalseVal) - Result.push_back(std::make_pair(Val, C.second)); + Result.emplace_back(Val, C.second); } return !Result.empty(); @@ -936,7 +936,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( Constant *CI = LVI->getConstant(V, BB, CxtI); if (Constant *KC = getKnownConstant(CI, Preference)) { for (BasicBlock *Pred : predecessors(BB)) - Result.push_back(std::make_pair(KC, Pred)); + Result.emplace_back(KC, Pred); } return !Result.empty(); @@ -1345,7 +1345,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. - AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable)); + AvailablePreds.emplace_back(PredBB, PredAvailable); } // If the loaded value isn't available in any predecessor, it isn't partially @@ -1419,7 +1419,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (AATags) NewVal->setAAMetadata(AATags); - AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); + AvailablePreds.emplace_back(UnavailablePred, NewVal); } // Now we know that each predecessor of this block has a value in @@ -1652,7 +1652,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, isa(Pred->getTerminator())) continue; - PredToDestList.push_back(std::make_pair(Pred, DestBB)); + PredToDestList.emplace_back(Pred, DestBB); } // If all edges were unthreadable, we fail. diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp index b39edca6780fcf..4553b23532f215 100644 --- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -189,7 +189,8 @@ static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) { return false; } - return !(isStatepoint(Call) || isGCRelocate(Call) || isGCResult(Call)); + return !(isa(Call) || isa(Call) || + isa(Call)); } /// Returns true if this loop is known to contain a call safepoint which diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 2aace0b7f81119..ab284b75ee2c21 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2586,7 +2586,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, auto NeedsRewrite = [&TLI](Instruction &I) { if (const auto *Call = dyn_cast(&I)) - return !callsGCLeafFunction(Call, TLI) && !isStatepoint(Call); + return !callsGCLeafFunction(Call, TLI) && !isa(Call); return false; }; diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 4fd63fa1838bfc..a752e356b7273f 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -460,6 +460,16 @@ class TailRecursionEliminator { SmallVector ArgumentPHIs; bool RemovableCallsMustBeMarkedTail = false; + // PHI node to store our return value. + PHINode *RetPN = nullptr; + + // i1 PHI node to track if we have a valid return value stored in RetPN. + PHINode *RetKnownPN = nullptr; + + // Vector of select instructions we insereted. These selects use RetKnownPN + // to either propagate RetPN or select a new return value. + SmallVector RetSelects; + TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) @@ -577,6 +587,21 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) { PN->addIncoming(&*I, NewEntry); ArgumentPHIs.push_back(PN); } + + // If the function doen't return void, create the RetPN and RetKnownPN PHI + // nodes to track our return value. We initialize RetPN with undef and + // RetKnownPN with false since we can't know our return value at function + // entry. + Type *RetType = F.getReturnType(); + if (!RetType->isVoidTy()) { + Type *BoolType = Type::getInt1Ty(F.getContext()); + RetPN = PHINode::Create(RetType, 2, "ret.tr", InsertPos); + RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr", InsertPos); + + RetPN->addIncoming(UndefValue::get(RetType), NewEntry); + RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry); + } + // The entry block was changed from HeaderBB to NewEntry. // The forward DominatorTree needs to be recalculated when the EntryBB is // changed. In this corner-case we recalculate the entire tree. @@ -616,11 +641,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { // value for the accumulator is placed in this variable. If this value is set // then we actually perform accumulator recursion elimination instead of // simple tail recursion elimination. If the operation is an LLVM instruction - // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then - // we are handling the case when the return instruction returns a constant C - // which is different to the constant returned by other return instructions - // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a - // special case of accumulator recursion, the operation being "return C". + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. Value *AccumulatorRecursionEliminationInitVal = nullptr; Instruction *AccumulatorRecursionInstr = nullptr; @@ -647,26 +668,6 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { } } - // We can only transform call/return pairs that either ignore the return value - // of the call and return void, ignore the value of the call and return a - // constant, return the value returned by the tail call, or that are being - // accumulator recursion variable eliminated. - if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && - !isa(Ret->getReturnValue()) && - AccumulatorRecursionEliminationInitVal == nullptr && - !getCommonReturnValue(nullptr, CI)) { - // One case remains that we are able to handle: the current return - // instruction returns a constant, and all other return instructions - // return a different constant. - if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) - return false; // Current return instruction does not return a constant. - // Check that all other return instructions return a common constant. If - // so, record it in AccumulatorRecursionEliminationInitVal. - AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); - if (!AccumulatorRecursionEliminationInitVal) - return false; - } - BasicBlock *BB = Ret->getParent(); using namespace ore; @@ -698,20 +699,15 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { PHINode *AccPN = insertAccumulator(AccumulatorRecursionEliminationInitVal); Instruction *AccRecInstr = AccumulatorRecursionInstr; - if (AccRecInstr) { - // Add an incoming argument for the current block, which is computed by - // our associative and commutative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); - - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); - } else { - // Add an incoming argument for the current block, which is just the - // constant returned by the current return instruction. - AccPN->addIncoming(Ret->getReturnValue(), BB); - } + + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will @@ -722,6 +718,25 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { ++NumAccumAdded; } + // Update our return value tracking + if (RetPN) { + if (Ret->getReturnValue() == CI || AccumulatorRecursionEliminationInitVal) { + // Defer selecting a return value + RetPN->addIncoming(RetPN, BB); + RetKnownPN->addIncoming(RetKnownPN, BB); + } else { + // We found a return value we want to use, insert a select instruction to + // select it if we don't already know what our return value will be and + // store the result in our return value PHI node. + SelectInst *SI = SelectInst::Create( + RetKnownPN, RetPN, Ret->getReturnValue(), "current.ret.tr", Ret); + RetSelects.push_back(SI); + + RetPN->addIncoming(SI, BB); + RetKnownPN->addIncoming(ConstantInt::getTrue(RetKnownPN->getType()), BB); + } + } + // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret); @@ -804,6 +819,30 @@ void TailRecursionEliminator::cleanupAndFinalize() { PN->eraseFromParent(); } } + + if (RetPN) { + if (RetSelects.empty()) { + // If we didn't insert any select instructions, then we know we didn't + // store a return value and we can remove the PHI nodes we inserted. + RetPN->dropAllReferences(); + RetPN->eraseFromParent(); + + RetKnownPN->dropAllReferences(); + RetKnownPN->eraseFromParent(); + } else { + // We need to insert a select instruction before any return left in the + // function to select our stored return value if we have one. + for (BasicBlock &BB : F) { + ReturnInst *RI = dyn_cast(BB.getTerminator()); + if (!RI) + continue; + + SelectInst *SI = SelectInst::Create( + RetKnownPN, RetPN, RI->getOperand(0), "current.ret.tr", RI); + RI->setOperand(0, SI); + } + } + } } bool TailRecursionEliminator::eliminate(Function &F, diff --git a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp index 7880ea1c6c4797..b559811d120bcf 100644 --- a/llvm/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/llvm/lib/Transforms/Utils/StripGCRelocates.cpp @@ -48,7 +48,7 @@ bool StripGCRelocates::runOnFunction(Function &F) { // i.e. not bound to a single statepoint token. for (Instruction &I : instructions(F)) { if (auto *GCR = dyn_cast(&I)) - if (isStatepoint(GCR->getOperand(0))) + if (isa(GCR->getOperand(0))) GCRelocates.push_back(GCR); } // All gc.relocates are bound to a single statepoint token. The order of diff --git a/llvm/test/Assembler/debug-info.ll b/llvm/test/Assembler/debug-info.ll index d54dba07ac1e07..419623a2cb7d14 100644 --- a/llvm/test/Assembler/debug-info.ll +++ b/llvm/test/Assembler/debug-info.ll @@ -4,10 +4,10 @@ ; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39} !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42} -; CHECK: !0 = !DISubrange(count: 3) +; CHECK: !0 = !DISubrange(count: 3, lowerBound: 0) ; CHECK-NEXT: !1 = !DISubrange(count: 3, lowerBound: 4) ; CHECK-NEXT: !2 = !DISubrange(count: 3, lowerBound: -5) -!0 = !DISubrange(count: 3) +!0 = !DISubrange(count: 3, lowerBound: 0) !1 = !DISubrange(count: 3, lowerBound: 0) !2 = !DISubrange(count: 3, lowerBound: 4) diff --git a/llvm/test/Assembler/disubrange-empty-array.ll b/llvm/test/Assembler/disubrange-empty-array.ll index 7b5279e3d3c2ce..ef0ca0e81a2707 100644 --- a/llvm/test/Assembler/disubrange-empty-array.ll +++ b/llvm/test/Assembler/disubrange-empty-array.ll @@ -4,10 +4,10 @@ ; CHECK: !named = !{!0, !0, !1, !2} !named = !{!0, !1, !2, !3} -; CHECK: !0 = !DISubrange(count: -1) +; CHECK: !0 = !DISubrange(count: -1, lowerBound: 0) ; CHECK-NEXT: !1 = !DISubrange(count: -1, lowerBound: 4) ; CHECK-NEXT: !2 = !DISubrange(count: -1, lowerBound: -5) -!0 = !DISubrange(count: -1) +!0 = !DISubrange(count: -1, lowerBound: 0) !1 = !DISubrange(count: -1, lowerBound: 0) !2 = !DISubrange(count: -1, lowerBound: 4) diff --git a/llvm/test/Assembler/invalid-disubrange-count-missing.ll b/llvm/test/Assembler/invalid-disubrange-count-missing.ll index 8fc4487117f681..8b7bf713a8e916 100644 --- a/llvm/test/Assembler/invalid-disubrange-count-missing.ll +++ b/llvm/test/Assembler/invalid-disubrange-count-missing.ll @@ -1,4 +1,5 @@ ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s -; CHECK: [[@LINE+1]]:32: error: missing required field 'count' +!named = !{!0} +; CHECK: Subrange must contain count or upperBound !0 = !DISubrange(lowerBound: -3) diff --git a/llvm/test/Bindings/llvm-c/debug_info.ll b/llvm/test/Bindings/llvm-c/debug_info.ll index 59d9628ff009db..d56873f1cb2511 100644 --- a/llvm/test/Bindings/llvm-c/debug_info.ll +++ b/llvm/test/Bindings/llvm-c/debug_info.ll @@ -60,7 +60,7 @@ ; CHECK-NEXT: !33 = !{!6, !6, !34} ; CHECK-NEXT: !34 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !35) ; CHECK-NEXT: !35 = !{!36} -; CHECK-NEXT: !36 = !DISubrange(count: 10) +; CHECK-NEXT: !36 = !DISubrange(count: 10, lowerBound: 0) ; CHECK-NEXT: !37 = !{!38, !39, !40, !41} ; CHECK-NEXT: !38 = !DILocalVariable(name: "a", arg: 1, scope: !31, file: !1, line: 42, type: !6) ; CHECK-NEXT: !39 = !DILocalVariable(name: "b", arg: 2, scope: !31, file: !1, line: 42, type: !6) diff --git a/llvm/test/Bitcode/fortranSubrange.ll b/llvm/test/Bitcode/fortranSubrange.ll new file mode 100644 index 00000000000000..7b97be5b352dcf --- /dev/null +++ b/llvm/test/Bitcode/fortranSubrange.ll @@ -0,0 +1,44 @@ +;; This test checks DISubrange bounds + +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +;; Test whether bounds are generated correctly. +; CHECK: !{{[0-9]+}} = !DISubrange(lowerBound: 3, upperBound: ![[NODE:[0-9]+]], stride: !DIExpression(DW_OP_constu, 4)) +; CHECK: ![[NODE]] = distinct !DILocalVariable + + +; ModuleID = 'fortsubrange.ll' +source_filename = "fortsubrange.ll" + +define void @MAIN_() !dbg !5 { +L.entry: + %.Z0640_333 = alloca i32*, align 8 + %"arr$sd1_349" = alloca [16 x i64], align 8 + call void @llvm.dbg.declare(metadata i32** %.Z0640_333, metadata !8, metadata !DIExpression(DW_OP_deref)), !dbg !15 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !13, metadata !DIExpression(DW_OP_plus_uconst, 120)), !dbg !15 + ret void, !dbg !16 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortsubrange.f90", directory: "/dir") +!4 = !{} +!5 = distinct !DISubprogram(name: "main", scope: !2, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !2) +!6 = !DISubroutineType(cc: DW_CC_program, types: !7) +!7 = !{null} +!8 = !DILocalVariable(name: "arr", scope: !5, file: !3, type: !9) +!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 32, align: 32, elements: !11) +!10 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!11 = !{!12} +!12 = !DISubrange(lowerBound: 3, upperBound: !13, stride: !DIExpression(DW_OP_constu, 4)) +!13 = distinct !DILocalVariable(scope: !5, file: !3, type: !14, flags: DIFlagArtificial) +!14 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) +!15 = !DILocation(line: 0, scope: !5) +!16 = !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/Bitcode/fortranSubrangeBackward.ll b/llvm/test/Bitcode/fortranSubrangeBackward.ll new file mode 100644 index 00000000000000..ffa987e2f01edf --- /dev/null +++ b/llvm/test/Bitcode/fortranSubrangeBackward.ll @@ -0,0 +1,50 @@ +;; This test checks Backward compatibility of DISubrange bounds +; REQUIRES: x86_64-linux + +; RUN: llvm-dis -o - %s.bc | FileCheck %s + +;; Test whether bounds are generated correctly. +; CHECK: !DISubrange(count: 15, lowerBound: 3) +; CHECK: !DISubrange(count: ![[NODE:[0-9]+]], lowerBound: 3) +; CHECK: ![[NODE]] = distinct !DILocalVariable + + +; ModuleID = 'fortsubrange.ll' +source_filename = "fortsubrange.ll" + +define void @MAIN_() !dbg !10 { +L.entry: + %.Z0640_333 = alloca i32*, align 8 + %"arr$sd1_349" = alloca [16 x i64], align 8 + call void @llvm.dbg.declare(metadata i32** %.Z0640_333, metadata !13, metadata !DIExpression(DW_OP_deref)), !dbg !19 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !17, metadata !DIExpression(DW_OP_plus_uconst, 120)), !dbg !19 + ret void, !dbg !20 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortsubrange.f90", directory: "/dir") +!4 = !{} +!5 = !{!6} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32, align: 32, elements: !8) +!7 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 15, lowerBound: 3) +!10 = distinct !DISubprogram(name: "main", scope: !2, file: !3, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !2) +!11 = !DISubroutineType(cc: DW_CC_program, types: !12) +!12 = !{null} +!13 = !DILocalVariable(name: "arr", scope: !10, file: !3, type: !14) +!14 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32, align: 32, elements: !15) +!15 = !{!16} +!16 = !DISubrange(count: !17, lowerBound: 3) +!17 = distinct !DILocalVariable(scope: !10, file: !3, type: !18, flags: DIFlagArtificial) +!18 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) +!19 = !DILocation(line: 0, scope: !10) +!20 = !DILocation(line: 6, column: 1, scope: !10) diff --git a/llvm/test/Bitcode/fortranSubrangeBackward.ll.bc b/llvm/test/Bitcode/fortranSubrangeBackward.ll.bc new file mode 100644 index 00000000000000..00e427b1cdaa2a Binary files /dev/null and b/llvm/test/Bitcode/fortranSubrangeBackward.ll.bc differ diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir new file mode 100644 index 00000000000000..e387c5e58d6fbf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-not-really-equiv-insts.mir @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- | + @g = external hidden unnamed_addr global i32, align 4 + define void @not_necessarily_equiv_loads() { ret void } + define void @invariant_loads() { ret void } + define void @both_have_to_be_invariant() { ret void } +... +--- +name: not_necessarily_equiv_loads +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + + ; %load1 || %load2 == %load1 is not necessarily true, even though they + ; both load from the same address. Whatever is in that address may be + ; changed by another instruction which appears between them. + ; + ; Check that we don't remove the G_OR. + + ; CHECK-LABEL: name: not_necessarily_equiv_loads + ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + ; CHECK: %or:_(s32) = G_OR %load2, %load1 + ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = G_GLOBAL_VALUE @g + %load1:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (load 4 from @g) + %or:_(s32) = G_OR %load2, %load1 + G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + RET_ReallyLR + +... +--- +name: invariant_loads +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + + ; %load1 || %load2 == %load1 is fine here, because the loads are invariant. + + ; CHECK-LABEL: name: invariant_loads + ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + ; CHECK: G_STORE %load2(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = G_GLOBAL_VALUE @g + %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + %or:_(s32) = G_OR %load2, %load1 + G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + RET_ReallyLR + +... +--- +name: both_have_to_be_invariant +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + + ; We shouldn't combine here, because the loads both have to be invariant. + + ; CHECK-LABEL: name: both_have_to_be_invariant + ; CHECK: %ptr:_(p0) = G_GLOBAL_VALUE @g + ; CHECK: %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + ; CHECK: %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load 4 from @g) + ; CHECK: %or:_(s32) = G_OR %load2, %load1 + ; CHECK: G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + ; CHECK: RET_ReallyLR + %ptr:_(p0) = G_GLOBAL_VALUE @g + %load1:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable invariant load 4 from @g) + %load2:_(s32) = G_LOAD %ptr(p0) :: (dereferenceable load 4 from @g) + %or:_(s32) = G_OR %load2, %load1 + G_STORE %or(s32), %ptr(p0) :: (store 4 into @g) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index eeee6d0f6049c2..401a0ac8df66b2 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -35,6 +35,7 @@ ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 0d4d2c7460071e..7820734e366d0b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -207,6 +207,17 @@ define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { ret <2 x double> %tmp4 } +define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) { +; CHECK-LABEL: ins1f2_args_flipped: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %tmp3 = extractelement <1 x double> %tmp1, i32 0 + %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 + ret <2 x double> %tmp4 +} + define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { ; CHECK-LABEL: ins16b8: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index 21bdb45965bd90..982d232f12f497 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -8,6 +8,9 @@ define aarch64_sve_vector_pcs void @spills_fills_stack_id_ppr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr2() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr3() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @spills_fills_stack_id_zpr4() #0 { entry: unreachable } attributes #0 = { nounwind "target-features"="+sve" } @@ -90,3 +93,120 @@ body: | $z0 = COPY %0 RET_ReallyLR ... +--- +name: spills_fills_stack_id_zpr2 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr2 } +stack: +liveins: + - { reg: '$z0_z1', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr2 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr2 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + + %0:zpr2 = COPY $z0_z1 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr3 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr3 } +stack: +liveins: + - { reg: '$z0_z1_z2', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr3 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 48, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr3 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + + %0:zpr3 = COPY $z0_z1_z2 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2 = COPY %0 + RET_ReallyLR +... +--- +name: spills_fills_stack_id_zpr4 +tracksRegLiveness: true +registers: + - { id: 0, class: zpr4 } +stack: +liveins: + - { reg: '$z0_z1_z2_z3', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $z0_z1_z2_z3 + + ; CHECK-LABEL: name: spills_fills_stack_id_zpr4 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 16 + ; CHECK-NEXT: stack-id: sve-vec + + ; EXPAND-LABEL: name: spills_fills_stack_id_zpr4 + ; EXPAND: STR_ZXI $z0, $sp, 0 + ; EXPAND: STR_ZXI $z1, $sp, 1 + ; EXPAND: STR_ZXI $z2, $sp, 2 + ; EXPAND: STR_ZXI $z3, $sp, 3 + ; EXPAND: $z0 = LDR_ZXI $sp, 0 + ; EXPAND: $z1 = LDR_ZXI $sp, 1 + ; EXPAND: $z2 = LDR_ZXI $sp, 2 + ; EXPAND: $z3 = LDR_ZXI $sp, 3 + + %0:zpr4 = COPY $z0_z1_z2_z3 + + $z0_z1_z2_z3 = IMPLICIT_DEF + $z4_z5_z6_z7 = IMPLICIT_DEF + $z8_z9_z10_z11 = IMPLICIT_DEF + $z12_z13_z14_z15 = IMPLICIT_DEF + $z16_z17_z18_z19 = IMPLICIT_DEF + $z20_z21_z22_z23 = IMPLICIT_DEF + $z24_z25_z26_z27 = IMPLICIT_DEF + $z28_z29_z30_z31 = IMPLICIT_DEF + + $z0_z1_z2_z3 = COPY %0 + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 5a7b4b390b5dd2..443944408f339b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -1517,3 +1517,639 @@ entry: %ext = extractelement <16 x double> , i32 %sel ret double %ext } + +define amdgpu_ps float @dyn_extract_v6f32_s_v(<6 x float> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v6f32_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b64 s[6:7], exec +; GPRIDX-NEXT: BB33_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 +; GPRIDX-NEXT: s_movrels_b32 s8, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s8 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB33_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[6:7] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f32_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b64 s[6:7], exec +; MOVREL-NEXT: BB33_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v0 +; MOVREL-NEXT: s_mov_b32 m0, s8 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 +; MOVREL-NEXT: s_movrels_b32 s8, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s8 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB33_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[6:7] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x float> %vec, i32 %sel + ret float %ext +} + +define float @dyn_extract_v6f32_v_v(<6 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v6f32_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB34_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v6 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v7, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB34_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v6f32_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB34_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v6 +; MOVREL-NEXT: v_movrels_b32_e32 v7, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB34_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v7 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <6 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v6f32_v_s(<6 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v6f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v6f32_s_s(<6 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v6f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s8 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s8 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v7f32_s_v(<7 x float> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v7f32_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b64 s[8:9], exec +; GPRIDX-NEXT: BB37_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v0 +; GPRIDX-NEXT: s_mov_b32 m0, s7 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 +; GPRIDX-NEXT: s_movrels_b32 s7, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s7 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB37_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f32_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b64 s[8:9], exec +; MOVREL-NEXT: BB37_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v0 +; MOVREL-NEXT: s_mov_b32 m0, s7 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 +; MOVREL-NEXT: s_movrels_b32 s7, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s7 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB37_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[8:9] +; MOVREL-NEXT: v_mov_b32_e32 v0, v1 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x float> %vec, i32 %sel + ret float %ext +} + +define float @dyn_extract_v7f32_v_v(<7 x float> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v7f32_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB38_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v7 +; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB38_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v7f32_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB38_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v7 +; MOVREL-NEXT: s_mov_b32 m0, s6 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v7 +; MOVREL-NEXT: v_movrels_b32_e32 v8, v0 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB38_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v8 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <7 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v7f32_v_s(<7 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v7f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v7f32_s_s(<7 x float> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v7f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 m0, s9 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_movrels_b32 s0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s9 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_movrels_b32 s0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps double @dyn_extract_v6f64_s_v(<6 x double> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v6f64_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s16, s2 +; GPRIDX-NEXT: s_mov_b32 s17, s3 +; GPRIDX-NEXT: s_mov_b32 s18, s4 +; GPRIDX-NEXT: s_mov_b32 s19, s5 +; GPRIDX-NEXT: s_mov_b32 s20, s6 +; GPRIDX-NEXT: s_mov_b32 s21, s7 +; GPRIDX-NEXT: s_mov_b32 s22, s8 +; GPRIDX-NEXT: s_mov_b32 s23, s9 +; GPRIDX-NEXT: s_mov_b32 s24, s10 +; GPRIDX-NEXT: s_mov_b32 s25, s11 +; GPRIDX-NEXT: s_mov_b32 s26, s12 +; GPRIDX-NEXT: s_mov_b32 s27, s13 +; GPRIDX-NEXT: s_mov_b64 s[2:3], exec +; GPRIDX-NEXT: BB41_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 +; GPRIDX-NEXT: s_lshl_b32 m0, s0, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; GPRIDX-NEXT: s_movrels_b32 s0, s16 +; GPRIDX-NEXT: s_movrels_b32 s1, s17 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB41_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[2:3] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f64_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s16, s2 +; MOVREL-NEXT: s_mov_b32 s17, s3 +; MOVREL-NEXT: s_mov_b32 s18, s4 +; MOVREL-NEXT: s_mov_b32 s19, s5 +; MOVREL-NEXT: s_mov_b32 s20, s6 +; MOVREL-NEXT: s_mov_b32 s21, s7 +; MOVREL-NEXT: s_mov_b32 s22, s8 +; MOVREL-NEXT: s_mov_b32 s23, s9 +; MOVREL-NEXT: s_mov_b32 s24, s10 +; MOVREL-NEXT: s_mov_b32 s25, s11 +; MOVREL-NEXT: s_mov_b32 s26, s12 +; MOVREL-NEXT: s_mov_b32 s27, s13 +; MOVREL-NEXT: s_mov_b64 s[2:3], exec +; MOVREL-NEXT: BB41_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 +; MOVREL-NEXT: s_lshl_b32 m0, s0, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; MOVREL-NEXT: s_movrels_b32 s0, s16 +; MOVREL-NEXT: s_movrels_b32 s1, s17 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB41_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[2:3] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x double> %vec, i32 %sel + ret double %ext +} + +define double @dyn_extract_v6f64_v_v(<6 x double> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v6f64_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB42_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v12 +; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v12 +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v13, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v14, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB42_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v13 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v14 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v6f64_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB42_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v12 +; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v13, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v12 +; MOVREL-NEXT: v_movrels_b32_e32 v14, v1 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB42_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v13 +; MOVREL-NEXT: v_mov_b32_e32 v1, v14 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <6 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v6f64_v_s(<6 x double> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v6f64_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v12, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f64_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v12, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v6f64_s_s(<6 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v6f64_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s14 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v6f64_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s14 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <6 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v7f64_s_v(<7 x double> inreg %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v7f64_s_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s16, s2 +; GPRIDX-NEXT: s_mov_b32 s17, s3 +; GPRIDX-NEXT: s_mov_b32 s18, s4 +; GPRIDX-NEXT: s_mov_b32 s19, s5 +; GPRIDX-NEXT: s_mov_b32 s20, s6 +; GPRIDX-NEXT: s_mov_b32 s21, s7 +; GPRIDX-NEXT: s_mov_b32 s22, s8 +; GPRIDX-NEXT: s_mov_b32 s23, s9 +; GPRIDX-NEXT: s_mov_b32 s24, s10 +; GPRIDX-NEXT: s_mov_b32 s25, s11 +; GPRIDX-NEXT: s_mov_b32 s26, s12 +; GPRIDX-NEXT: s_mov_b32 s27, s13 +; GPRIDX-NEXT: s_mov_b32 s28, s14 +; GPRIDX-NEXT: s_mov_b32 s29, s15 +; GPRIDX-NEXT: s_mov_b64 s[2:3], exec +; GPRIDX-NEXT: BB45_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 +; GPRIDX-NEXT: s_lshl_b32 m0, s0, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; GPRIDX-NEXT: s_movrels_b32 s0, s16 +; GPRIDX-NEXT: s_movrels_b32 s1, s17 +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB45_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[2:3] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f64_s_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s16, s2 +; MOVREL-NEXT: s_mov_b32 s17, s3 +; MOVREL-NEXT: s_mov_b32 s18, s4 +; MOVREL-NEXT: s_mov_b32 s19, s5 +; MOVREL-NEXT: s_mov_b32 s20, s6 +; MOVREL-NEXT: s_mov_b32 s21, s7 +; MOVREL-NEXT: s_mov_b32 s22, s8 +; MOVREL-NEXT: s_mov_b32 s23, s9 +; MOVREL-NEXT: s_mov_b32 s24, s10 +; MOVREL-NEXT: s_mov_b32 s25, s11 +; MOVREL-NEXT: s_mov_b32 s26, s12 +; MOVREL-NEXT: s_mov_b32 s27, s13 +; MOVREL-NEXT: s_mov_b32 s28, s14 +; MOVREL-NEXT: s_mov_b32 s29, s15 +; MOVREL-NEXT: s_mov_b64 s[2:3], exec +; MOVREL-NEXT: BB45_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 +; MOVREL-NEXT: s_lshl_b32 m0, s0, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s0, v0 +; MOVREL-NEXT: s_movrels_b32 s0, s16 +; MOVREL-NEXT: s_movrels_b32 s1, s17 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB45_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[2:3] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x double> %vec, i32 %sel + ret double %ext +} + +define double @dyn_extract_v7f64_v_v(<7 x double> %vec, i32 %sel) { +; GPRIDX-LABEL: dyn_extract_v7f64_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_mov_b64 s[4:5], exec +; GPRIDX-NEXT: BB46_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v14 +; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v14 +; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v15, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB46_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v15 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v16 +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; MOVREL-LABEL: dyn_extract_v7f64_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; MOVREL-NEXT: s_mov_b64 s[4:5], exec +; MOVREL-NEXT: BB46_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v14 +; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v15, v0 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v14 +; MOVREL-NEXT: v_movrels_b32_e32 v16, v1 +; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: s_xor_b64 exec, exec, vcc +; MOVREL-NEXT: s_cbranch_execnz BB46_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b64 exec, s[4:5] +; MOVREL-NEXT: v_mov_b32_e32 v0, v15 +; MOVREL-NEXT: v_mov_b32_e32 v1, v16 +; MOVREL-NEXT: s_setpc_b64 s[30:31] +entry: + %ext = extractelement <7 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v7f64_v_s(<7 x double> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v7f64_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v14, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f64_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v14, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps double @dyn_extract_v7f64_s_s(<7 x double> inreg %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v7f64_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 m0, s16 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v7f64_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s16 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <7 x double> %vec, i32 %sel + ret double %ext +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 5fb0ef97932f3d..0d4e4a9a068956 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -3397,3 +3397,1188 @@ entry: %insert = insertelement <16 x double> %vec, double %val, i32 %idx ret <16 x double> %insert } + +define amdgpu_ps <7 x i32> @dyn_insertelement_v7i32_s_s_s(<7 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7i32_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b32 s0, s9 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7i32_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_movreld_b32 s0, s9 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x i32> %vec, i32 %val, i32 %idx + ret <7 x i32> %insert +} + +define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 m0, s10 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b32 s0, s9 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7p3i8_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s10 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_movreld_b32 s0, s9 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx + ret <7 x i8 addrspace(3)*> %insert +} + +define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: s_set_gpr_idx_on s9, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: v_mov_b32_e32 v8, v0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: s_mov_b32 m0, s9 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s7 +; MOVREL-NEXT: v_movreld_b32_e32 v0, v8 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x float> %vec, float %val, i32 %idx + ret <7 x float> %insert +} + +define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_v(<7 x float> inreg %vec, float %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f32_s_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s0 +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB46_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v9 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v9 +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v11 +; GPRIDX-NEXT: v_mov_b32_e32 v2, v12 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v13 +; GPRIDX-NEXT: v_mov_b32_e32 v4, v14 +; GPRIDX-NEXT: v_mov_b32_e32 v5, v15 +; GPRIDX-NEXT: v_mov_b32_e32 v6, v16 +; GPRIDX-NEXT: v_mov_b32_e32 v7, v17 +; GPRIDX-NEXT: v_mov_b32_e32 v0, v8 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB46_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f32_s_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: v_mov_b32_e32 v17, s7 +; MOVREL-NEXT: v_mov_b32_e32 v13, s3 +; MOVREL-NEXT: v_mov_b32_e32 v14, s4 +; MOVREL-NEXT: v_mov_b32_e32 v15, s5 +; MOVREL-NEXT: v_mov_b32_e32 v16, s6 +; MOVREL-NEXT: v_mov_b32_e32 v12, s2 +; MOVREL-NEXT: v_mov_b32_e32 v11, s1 +; MOVREL-NEXT: v_mov_b32_e32 v10, s0 +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB46_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 +; MOVREL-NEXT: v_mov_b32_e32 v2, v10 +; MOVREL-NEXT: v_mov_b32_e32 v3, v11 +; MOVREL-NEXT: v_mov_b32_e32 v4, v12 +; MOVREL-NEXT: v_mov_b32_e32 v5, v13 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v1 +; MOVREL-NEXT: s_mov_b32 m0, s1 +; MOVREL-NEXT: v_mov_b32_e32 v6, v14 +; MOVREL-NEXT: v_mov_b32_e32 v7, v15 +; MOVREL-NEXT: v_mov_b32_e32 v8, v16 +; MOVREL-NEXT: v_mov_b32_e32 v9, v17 +; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB46_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, v2 +; MOVREL-NEXT: v_mov_b32_e32 v1, v3 +; MOVREL-NEXT: v_mov_b32_e32 v2, v4 +; MOVREL-NEXT: v_mov_b32_e32 v3, v5 +; MOVREL-NEXT: v_mov_b32_e32 v4, v6 +; MOVREL-NEXT: v_mov_b32_e32 v5, v7 +; MOVREL-NEXT: v_mov_b32_e32 v6, v8 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x float> %vec, float %val, i32 %idx + ret <7 x float> %insert +} + +define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_s(<7 x float> %vec, float %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v7 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: v_movreld_b32_e32 v0, v7 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x float> %vec, float %val, i32 %idx + ret <7 x float> %insert +} + +define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_v_v_v(<7 x float> %vec, float %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f32_v_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB48_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v8 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v8 +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v16, v7 +; GPRIDX-NEXT: v_mov_b32_e32 v15, v6 +; GPRIDX-NEXT: v_mov_b32_e32 v14, v5 +; GPRIDX-NEXT: v_mov_b32_e32 v13, v4 +; GPRIDX-NEXT: v_mov_b32_e32 v12, v3 +; GPRIDX-NEXT: v_mov_b32_e32 v11, v2 +; GPRIDX-NEXT: v_mov_b32_e32 v10, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v7 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB48_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v10 +; GPRIDX-NEXT: v_mov_b32_e32 v2, v11 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v12 +; GPRIDX-NEXT: v_mov_b32_e32 v4, v13 +; GPRIDX-NEXT: v_mov_b32_e32 v5, v14 +; GPRIDX-NEXT: v_mov_b32_e32 v6, v15 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f32_v_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB48_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v8 +; MOVREL-NEXT: v_mov_b32_e32 v16, v7 +; MOVREL-NEXT: v_mov_b32_e32 v9, v0 +; MOVREL-NEXT: v_mov_b32_e32 v15, v6 +; MOVREL-NEXT: v_mov_b32_e32 v14, v5 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v8 +; MOVREL-NEXT: s_mov_b32 m0, s1 +; MOVREL-NEXT: v_mov_b32_e32 v13, v4 +; MOVREL-NEXT: v_mov_b32_e32 v12, v3 +; MOVREL-NEXT: v_mov_b32_e32 v11, v2 +; MOVREL-NEXT: v_mov_b32_e32 v10, v1 +; MOVREL-NEXT: v_movreld_b32_e32 v9, v7 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB48_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_mov_b32_e32 v0, v9 +; MOVREL-NEXT: v_mov_b32_e32 v1, v10 +; MOVREL-NEXT: v_mov_b32_e32 v2, v11 +; MOVREL-NEXT: v_mov_b32_e32 v3, v12 +; MOVREL-NEXT: v_mov_b32_e32 v4, v13 +; MOVREL-NEXT: v_mov_b32_e32 v5, v14 +; MOVREL-NEXT: v_mov_b32_e32 v6, v15 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x float> %vec, float %val, i32 %idx + ret <7 x float> %insert +} + +define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_s_s(<7 x double> inreg %vec, double inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f64_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 m0, s18 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[16:17] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f64_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_movreld_b64 s[0:1], s[16:17] +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x double> %vec, double %val, i32 %idx + ret <7 x double> %insert +} + +define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_s(<7 x double> inreg %vec, double %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 +; GPRIDX-NEXT: s_lshl_b32 s0, s16, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: v_mov_b32_e32 v17, s15 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: s_lshl_b32 m0, s16, 1 +; MOVREL-NEXT: v_mov_b32_e32 v16, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x double> %vec, double %val, i32 %idx + ret <7 x double> %insert +} + +define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_s_v_v(<7 x double> inreg %vec, double %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f64_s_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v34, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v33, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v32, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s0 +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB51_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 +; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2 +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v3, v19 +; GPRIDX-NEXT: v_mov_b32_e32 v4, v20 +; GPRIDX-NEXT: v_mov_b32_e32 v5, v21 +; GPRIDX-NEXT: v_mov_b32_e32 v6, v22 +; GPRIDX-NEXT: v_mov_b32_e32 v7, v23 +; GPRIDX-NEXT: v_mov_b32_e32 v8, v24 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v25 +; GPRIDX-NEXT: v_mov_b32_e32 v10, v26 +; GPRIDX-NEXT: v_mov_b32_e32 v11, v27 +; GPRIDX-NEXT: v_mov_b32_e32 v12, v28 +; GPRIDX-NEXT: v_mov_b32_e32 v13, v29 +; GPRIDX-NEXT: v_mov_b32_e32 v14, v30 +; GPRIDX-NEXT: v_mov_b32_e32 v15, v31 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v32 +; GPRIDX-NEXT: v_mov_b32_e32 v17, v33 +; GPRIDX-NEXT: v_mov_b32_e32 v18, v34 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v4, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB51_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v15 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v16 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f64_s_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: v_mov_b32_e32 v34, s15 +; MOVREL-NEXT: v_mov_b32_e32 v33, s14 +; MOVREL-NEXT: v_mov_b32_e32 v30, s11 +; MOVREL-NEXT: v_mov_b32_e32 v31, s12 +; MOVREL-NEXT: v_mov_b32_e32 v32, s13 +; MOVREL-NEXT: v_mov_b32_e32 v29, s10 +; MOVREL-NEXT: v_mov_b32_e32 v28, s9 +; MOVREL-NEXT: v_mov_b32_e32 v27, s8 +; MOVREL-NEXT: v_mov_b32_e32 v26, s7 +; MOVREL-NEXT: v_mov_b32_e32 v25, s6 +; MOVREL-NEXT: v_mov_b32_e32 v24, s5 +; MOVREL-NEXT: v_mov_b32_e32 v23, s4 +; MOVREL-NEXT: v_mov_b32_e32 v22, s3 +; MOVREL-NEXT: v_mov_b32_e32 v21, s2 +; MOVREL-NEXT: v_mov_b32_e32 v20, s1 +; MOVREL-NEXT: v_mov_b32_e32 v19, s0 +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB51_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v2 +; MOVREL-NEXT: v_mov_b32_e32 v3, v19 +; MOVREL-NEXT: v_mov_b32_e32 v4, v20 +; MOVREL-NEXT: v_mov_b32_e32 v5, v21 +; MOVREL-NEXT: v_mov_b32_e32 v6, v22 +; MOVREL-NEXT: s_lshl_b32 m0, s1, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v2 +; MOVREL-NEXT: v_mov_b32_e32 v7, v23 +; MOVREL-NEXT: v_mov_b32_e32 v8, v24 +; MOVREL-NEXT: v_mov_b32_e32 v9, v25 +; MOVREL-NEXT: v_mov_b32_e32 v10, v26 +; MOVREL-NEXT: v_mov_b32_e32 v11, v27 +; MOVREL-NEXT: v_mov_b32_e32 v12, v28 +; MOVREL-NEXT: v_mov_b32_e32 v13, v29 +; MOVREL-NEXT: v_mov_b32_e32 v14, v30 +; MOVREL-NEXT: v_mov_b32_e32 v15, v31 +; MOVREL-NEXT: v_mov_b32_e32 v16, v32 +; MOVREL-NEXT: v_mov_b32_e32 v17, v33 +; MOVREL-NEXT: v_mov_b32_e32 v18, v34 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v4, v1 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB51_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v15 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v16 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x double> %vec, double %val, i32 %idx + ret <7 x double> %insert +} + +define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_s(<7 x double> %vec, double %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v15 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v14 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v16 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v13 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 +; MOVREL-NEXT: v_mov_b32_e32 v16, v15 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: v_movreld_b32_e32 v0, v14 +; MOVREL-NEXT: v_movreld_b32_e32 v1, v16 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v13 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x double> %vec, double %val, i32 %idx + ret <7 x double> %insert +} + +define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, double %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v7f64_v_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB53_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v16 +; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v16 +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v32, v15 +; GPRIDX-NEXT: v_mov_b32_e32 v31, v14 +; GPRIDX-NEXT: v_mov_b32_e32 v30, v13 +; GPRIDX-NEXT: v_mov_b32_e32 v29, v12 +; GPRIDX-NEXT: v_mov_b32_e32 v28, v11 +; GPRIDX-NEXT: v_mov_b32_e32 v27, v10 +; GPRIDX-NEXT: v_mov_b32_e32 v26, v9 +; GPRIDX-NEXT: v_mov_b32_e32 v25, v8 +; GPRIDX-NEXT: v_mov_b32_e32 v24, v7 +; GPRIDX-NEXT: v_mov_b32_e32 v23, v6 +; GPRIDX-NEXT: v_mov_b32_e32 v22, v5 +; GPRIDX-NEXT: v_mov_b32_e32 v21, v4 +; GPRIDX-NEXT: v_mov_b32_e32 v20, v3 +; GPRIDX-NEXT: v_mov_b32_e32 v19, v2 +; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v17, v14 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v18, v15 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB53_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v17 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v18 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v19 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v20 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v21 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v22 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v23 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v24 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v25 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v26 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v27 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v28 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v29 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v30 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v7f64_v_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB53_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v16 +; MOVREL-NEXT: v_mov_b32_e32 v32, v15 +; MOVREL-NEXT: v_mov_b32_e32 v17, v0 +; MOVREL-NEXT: v_mov_b32_e32 v31, v14 +; MOVREL-NEXT: v_mov_b32_e32 v30, v13 +; MOVREL-NEXT: s_lshl_b32 m0, s1, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v16 +; MOVREL-NEXT: v_mov_b32_e32 v29, v12 +; MOVREL-NEXT: v_mov_b32_e32 v28, v11 +; MOVREL-NEXT: v_mov_b32_e32 v27, v10 +; MOVREL-NEXT: v_mov_b32_e32 v26, v9 +; MOVREL-NEXT: v_mov_b32_e32 v25, v8 +; MOVREL-NEXT: v_mov_b32_e32 v24, v7 +; MOVREL-NEXT: v_mov_b32_e32 v23, v6 +; MOVREL-NEXT: v_mov_b32_e32 v22, v5 +; MOVREL-NEXT: v_mov_b32_e32 v21, v4 +; MOVREL-NEXT: v_mov_b32_e32 v20, v3 +; MOVREL-NEXT: v_mov_b32_e32 v19, v2 +; MOVREL-NEXT: v_mov_b32_e32 v18, v1 +; MOVREL-NEXT: v_movreld_b32_e32 v17, v14 +; MOVREL-NEXT: v_movreld_b32_e32 v18, v15 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB53_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v17 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v18 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v19 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v20 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v21 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v22 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v23 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v24 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v25 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v26 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v27 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v28 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v29 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v30 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <7 x double> %vec, double %val, i32 %idx + ret <7 x double> %insert +} + +define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_s_s(<5 x double> inreg %vec, double inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v5f64_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 m0, s14 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[12:13] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v5f64_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s14 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_movreld_b64 s[0:1], s[12:13] +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <5 x double> %vec, double %val, i32 %idx + ret <5 x double> %insert +} + +define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg %vec, double %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 +; GPRIDX-NEXT: s_lshl_b32 s0, s12, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: v_mov_b32_e32 v17, s15 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: s_lshl_b32 m0, s12, 1 +; MOVREL-NEXT: v_mov_b32_e32 v16, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <5 x double> %vec, double %val, i32 %idx + ret <5 x double> %insert +} + +define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_v(<5 x double> inreg %vec, double %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v5f64_s_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v34, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v33, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v32, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s0 +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB56_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 +; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v2 +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v3, v19 +; GPRIDX-NEXT: v_mov_b32_e32 v4, v20 +; GPRIDX-NEXT: v_mov_b32_e32 v5, v21 +; GPRIDX-NEXT: v_mov_b32_e32 v6, v22 +; GPRIDX-NEXT: v_mov_b32_e32 v7, v23 +; GPRIDX-NEXT: v_mov_b32_e32 v8, v24 +; GPRIDX-NEXT: v_mov_b32_e32 v9, v25 +; GPRIDX-NEXT: v_mov_b32_e32 v10, v26 +; GPRIDX-NEXT: v_mov_b32_e32 v11, v27 +; GPRIDX-NEXT: v_mov_b32_e32 v12, v28 +; GPRIDX-NEXT: v_mov_b32_e32 v13, v29 +; GPRIDX-NEXT: v_mov_b32_e32 v14, v30 +; GPRIDX-NEXT: v_mov_b32_e32 v15, v31 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v32 +; GPRIDX-NEXT: v_mov_b32_e32 v17, v33 +; GPRIDX-NEXT: v_mov_b32_e32 v18, v34 +; GPRIDX-NEXT: v_mov_b32_e32 v3, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v4, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB56_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v12 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v5f64_s_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: v_mov_b32_e32 v34, s15 +; MOVREL-NEXT: v_mov_b32_e32 v33, s14 +; MOVREL-NEXT: v_mov_b32_e32 v32, s13 +; MOVREL-NEXT: v_mov_b32_e32 v31, s12 +; MOVREL-NEXT: v_mov_b32_e32 v30, s11 +; MOVREL-NEXT: v_mov_b32_e32 v29, s10 +; MOVREL-NEXT: v_mov_b32_e32 v28, s9 +; MOVREL-NEXT: v_mov_b32_e32 v27, s8 +; MOVREL-NEXT: v_mov_b32_e32 v26, s7 +; MOVREL-NEXT: v_mov_b32_e32 v25, s6 +; MOVREL-NEXT: v_mov_b32_e32 v24, s5 +; MOVREL-NEXT: v_mov_b32_e32 v23, s4 +; MOVREL-NEXT: v_mov_b32_e32 v22, s3 +; MOVREL-NEXT: v_mov_b32_e32 v21, s2 +; MOVREL-NEXT: v_mov_b32_e32 v20, s1 +; MOVREL-NEXT: v_mov_b32_e32 v19, s0 +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB56_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v2 +; MOVREL-NEXT: v_mov_b32_e32 v3, v19 +; MOVREL-NEXT: v_mov_b32_e32 v4, v20 +; MOVREL-NEXT: v_mov_b32_e32 v5, v21 +; MOVREL-NEXT: v_mov_b32_e32 v6, v22 +; MOVREL-NEXT: s_lshl_b32 m0, s1, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v2 +; MOVREL-NEXT: v_mov_b32_e32 v7, v23 +; MOVREL-NEXT: v_mov_b32_e32 v8, v24 +; MOVREL-NEXT: v_mov_b32_e32 v9, v25 +; MOVREL-NEXT: v_mov_b32_e32 v10, v26 +; MOVREL-NEXT: v_mov_b32_e32 v11, v27 +; MOVREL-NEXT: v_mov_b32_e32 v12, v28 +; MOVREL-NEXT: v_mov_b32_e32 v13, v29 +; MOVREL-NEXT: v_mov_b32_e32 v14, v30 +; MOVREL-NEXT: v_mov_b32_e32 v15, v31 +; MOVREL-NEXT: v_mov_b32_e32 v16, v32 +; MOVREL-NEXT: v_mov_b32_e32 v17, v33 +; MOVREL-NEXT: v_mov_b32_e32 v18, v34 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v4, v1 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB56_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v12 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <5 x double> %vec, double %val, i32 %idx + ret <5 x double> %insert +} + +define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, double %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v11 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v10 +; GPRIDX-NEXT: v_mov_b32_e32 v1, v16 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v0 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v9 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 +; MOVREL-NEXT: v_mov_b32_e32 v16, v11 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: v_movreld_b32_e32 v0, v10 +; MOVREL-NEXT: v_movreld_b32_e32 v1, v16 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v9 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <5 x double> %vec, double %val, i32 %idx + ret <5 x double> %insert +} + +define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, double %val, i32 %idx) { +; GPRIDX-LABEL: dyn_insertelement_v5f64_v_v_v: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b64 s[0:1], exec +; GPRIDX-NEXT: BB58_1: ; =>This Inner Loop Header: Depth=1 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v12 +; GPRIDX-NEXT: s_lshl_b32 s3, s2, 1 +; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s2, v12 +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v28, v15 +; GPRIDX-NEXT: v_mov_b32_e32 v27, v14 +; GPRIDX-NEXT: v_mov_b32_e32 v26, v13 +; GPRIDX-NEXT: v_mov_b32_e32 v25, v12 +; GPRIDX-NEXT: v_mov_b32_e32 v24, v11 +; GPRIDX-NEXT: v_mov_b32_e32 v23, v10 +; GPRIDX-NEXT: v_mov_b32_e32 v22, v9 +; GPRIDX-NEXT: v_mov_b32_e32 v21, v8 +; GPRIDX-NEXT: v_mov_b32_e32 v20, v7 +; GPRIDX-NEXT: v_mov_b32_e32 v19, v6 +; GPRIDX-NEXT: v_mov_b32_e32 v18, v5 +; GPRIDX-NEXT: v_mov_b32_e32 v17, v4 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v3 +; GPRIDX-NEXT: v_mov_b32_e32 v15, v2 +; GPRIDX-NEXT: v_mov_b32_e32 v14, v1 +; GPRIDX-NEXT: v_mov_b32_e32 v13, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v13, v10 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s3, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v14, v11 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc +; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc +; GPRIDX-NEXT: s_cbranch_execnz BB58_1 +; GPRIDX-NEXT: ; %bb.2: +; GPRIDX-NEXT: s_mov_b64 exec, s[0:1] +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v15 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v16 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v17 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v18 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v19 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v20 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v21 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v22 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_v: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, exec_lo +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: BB58_1: ; =>This Inner Loop Header: Depth=1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v12 +; MOVREL-NEXT: v_mov_b32_e32 v28, v15 +; MOVREL-NEXT: v_mov_b32_e32 v27, v14 +; MOVREL-NEXT: v_mov_b32_e32 v26, v13 +; MOVREL-NEXT: v_mov_b32_e32 v25, v12 +; MOVREL-NEXT: v_mov_b32_e32 v24, v11 +; MOVREL-NEXT: v_mov_b32_e32 v23, v10 +; MOVREL-NEXT: v_mov_b32_e32 v22, v9 +; MOVREL-NEXT: v_mov_b32_e32 v21, v8 +; MOVREL-NEXT: v_mov_b32_e32 v20, v7 +; MOVREL-NEXT: v_mov_b32_e32 v19, v6 +; MOVREL-NEXT: v_mov_b32_e32 v18, v5 +; MOVREL-NEXT: v_mov_b32_e32 v17, v4 +; MOVREL-NEXT: v_mov_b32_e32 v16, v3 +; MOVREL-NEXT: v_mov_b32_e32 v15, v2 +; MOVREL-NEXT: v_mov_b32_e32 v14, v1 +; MOVREL-NEXT: v_mov_b32_e32 v13, v0 +; MOVREL-NEXT: s_lshl_b32 m0, s1, 1 +; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, s1, v12 +; MOVREL-NEXT: v_movreld_b32_e32 v13, v10 +; MOVREL-NEXT: v_movreld_b32_e32 v14, v11 +; MOVREL-NEXT: s_and_saveexec_b32 vcc_lo, vcc_lo +; MOVREL-NEXT: s_xor_b32 exec_lo, exec_lo, vcc_lo +; MOVREL-NEXT: s_cbranch_execnz BB58_1 +; MOVREL-NEXT: ; %bb.2: +; MOVREL-NEXT: s_mov_b32 exec_lo, s0 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v15 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v16 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v17 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v18 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v19 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v20 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v21 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v22 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <5 x double> %vec, double %val, i32 %idx + ret <5 x double> %insert +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir index 29e59cd4b29451..d010f7f4416045 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.mir @@ -15,7 +15,7 @@ body: | ; CHECK-LABEL: name: cos_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: cos_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_COS_F32_e64_:%[0-9]+]]:vgpr_32 = V_COS_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.cos), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir index a18242c3e44832..86b782b27e823d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cos.s16.mir @@ -17,7 +17,7 @@ body: | ; CHECK-LABEL: name: cos_s16_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -38,7 +38,7 @@ body: | ; CHECK-LABEL: name: cos_s16_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_COS_F16_e64_:%[0-9]+]]:vgpr_32 = V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_COS_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir index bc987b0703758b..2eef865fc85bbf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.cvt.pkrtz.mir @@ -15,7 +15,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -37,7 +37,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -58,7 +58,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_CVT_PKRTZ_F16_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir index b9f88557678a95..fe51eb9a393877 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmad.ftz.mir @@ -19,7 +19,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -43,7 +43,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -67,7 +67,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -92,7 +92,7 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY3]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -117,7 +117,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -140,7 +140,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -163,7 +163,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -185,7 +185,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmad.ftz), %0, %0, %0 @@ -226,7 +226,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir index 92f264f6b7ea74..13d268d68e942a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -16,7 +16,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -40,7 +40,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -64,7 +64,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -88,7 +88,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -113,7 +113,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -135,7 +135,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -157,7 +157,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -178,7 +178,7 @@ body: | ; GCN-LABEL: name: fmed3_s32_vsss ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir index c47565181cfcb5..7ffb251907707d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -21,7 +21,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -48,7 +48,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir index f9e9978e9cadae..8d62c2c4904965 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.mir @@ -15,7 +15,7 @@ body: | ; CHECK-LABEL: name: fract_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: fract_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F32_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 @@ -55,7 +55,7 @@ body: | ; CHECK-LABEL: name: fract_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 @@ -75,7 +75,7 @@ body: | ; CHECK-LABEL: name: fract_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F64_e64_:%[0-9]+]]:vreg_64 = V_FRACT_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir index ecae749c163b33..8360aee9a83e0c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fract.s16.mir @@ -5,6 +5,8 @@ # SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1:sgpr(s16) (in function: fract_s16_vs) # SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), %1:vgpr(s16) (in function: fract_s16_vv) +--- + name: fract_s16_vs legalized: true regBankSelected: true @@ -17,7 +19,7 @@ body: | ; CHECK-LABEL: name: fract_s16_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -38,7 +40,7 @@ body: | ; CHECK-LABEL: name: fract_s16_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FRACT_F16_e64_:%[0-9]+]]:vgpr_32 = V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_FRACT_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir index 7371dbb998ffbe..0fc2582f983f18 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.mir @@ -14,7 +14,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -35,7 +35,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -56,7 +56,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -77,7 +77,7 @@ body: | ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s32) = COPY $vgpr0 @@ -98,7 +98,7 @@ body: | ; GCN: liveins: $sgpr0_sgpr1, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s32) = COPY $sgpr0 @@ -119,7 +119,7 @@ body: | ; GCN: liveins: $vgpr0_vgpr1, $vgpr2 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F64_:%[0-9]+]]:vreg_64 = V_LDEXP_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir index ad63aa85e9a08c..1a620849f33625 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.ldexp.s16.mir @@ -19,7 +19,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -41,7 +41,7 @@ body: | ; GCN: liveins: $sgpr0, $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -63,7 +63,7 @@ body: | ; GCN: liveins: $vgpr0, $vgpr1 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_LDEXP_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir index def9d91830b882..3dbed8a7b5dd16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.legacy.mir @@ -20,7 +20,7 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_LEGACY_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 @@ -40,7 +40,7 @@ body: | ; CHECK-LABEL: name: rcp_legacy_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_LEGACY_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp.legacy), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir index 6dfdec77927304..fce88642940808 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.mir @@ -15,7 +15,7 @@ body: | ; CHECK-LABEL: name: rcp_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: rcp_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 @@ -55,7 +55,7 @@ body: | ; CHECK-LABEL: name: rcp_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 @@ -75,7 +75,7 @@ body: | ; CHECK-LABEL: name: rcp_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F64_e64_:%[0-9]+]]:vreg_64 = V_RCP_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir index 90cf12ee37e7bb..c69890ae5c85d6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rcp.s16.mir @@ -5,6 +5,7 @@ # SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:sgpr(s16) (in function: rcp_s16_vs) # SI-ERR: remark: :0:0: cannot select: %2:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %1:vgpr(s16) (in function: rcp_s16_vv) +--- name: rcp_s16_vs legalized: true regBankSelected: true @@ -17,7 +18,7 @@ body: | ; CHECK-LABEL: name: rcp_s16_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -38,7 +39,7 @@ body: | ; CHECK-LABEL: name: rcp_s16_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RCP_F16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RCP_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir index 6e514d217b56f3..4b78bf341b52d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.clamp.mir @@ -20,7 +20,7 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_CLAMP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 @@ -40,7 +40,7 @@ body: | ; CHECK-LABEL: name: rsq_clamp_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_CLAMP_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_CLAMP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_CLAMP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.clamp), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir index 0df3078a0e7005..2c129b08c651bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.legacy.mir @@ -20,7 +20,7 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_LEGACY_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 @@ -40,7 +40,7 @@ body: | ; CHECK-LABEL: name: rsq_legacy_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_LEGACY_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_LEGACY_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq.legacy), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir index 65fcb5deb44f27..4cf3fc5a9b4274 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.mir @@ -15,7 +15,7 @@ body: | ; CHECK-LABEL: name: rsq_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: rsq_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 @@ -55,7 +55,7 @@ body: | ; CHECK-LABEL: name: rsq_s64_vs ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 @@ -75,7 +75,7 @@ body: | ; CHECK-LABEL: name: rsq_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F64_e64_:%[0-9]+]]:vreg_64 = V_RSQ_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir index be12c84cf75e45..8620efd9fdcee6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.rsq.s16.mir @@ -17,7 +17,7 @@ body: | ; CHECK-LABEL: name: rsq_s16_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -38,7 +38,7 @@ body: | ; CHECK-LABEL: name: rsq_s16_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_RSQ_F16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_RSQ_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir index b069bc7ead2909..90e586c6888b3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.mir @@ -15,7 +15,7 @@ body: | ; CHECK-LABEL: name: sin_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: sin_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_SIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sin), %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir index ff049d1be98f18..903ee4a9a040fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.sin.s16.mir @@ -17,7 +17,7 @@ body: | ; CHECK-LABEL: name: sin_s16_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -38,7 +38,7 @@ body: | ; CHECK-LABEL: name: sin_s16_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_SIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_SIN_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir index d8f2fad8f93819..54cb2a0ab0e05c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir @@ -1,10 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t | FileCheck -check-prefix=GCN %s -# RUN: FileCheck -check-prefix=ERR %s < %t +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s | FileCheck -check-prefix=GCN %s -# ERR-NOT: remark: -# ERR: remark: :0:0: cannot select: %2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0:sgpr(<3 x s64>), %1:sgpr(<3 x s64>) (in function: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64) -# ERR-NOT: remark: --- name: test_concat_vectors_v_v4s16_v_v2s16_v_v2s16 @@ -634,10 +630,10 @@ body: | liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GCN-LABEL: name: test_concat_vectors_s_v6s64_s_v3s64_s_v3s64 - ; GCN: [[DEF:%[0-9]+]]:sgpr(<3 x s64>) = G_IMPLICIT_DEF - ; GCN: [[DEF1:%[0-9]+]]:sgpr(<3 x s64>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<6 x s64>) = G_CONCAT_VECTORS [[DEF]](<3 x s64>), [[DEF1]](<3 x s64>) - ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s64>) + ; GCN: [[DEF:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN: [[DEF1:%[0-9]+]]:sgpr_192 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[DEF]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[DEF1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(<3 x s64>) = G_IMPLICIT_DEF %1:sgpr(<3 x s64>) = G_IMPLICIT_DEF %2:sgpr(<6 x s64>) = G_CONCAT_VECTORS %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir index c94e0665da35b4..ed510864f3bbf7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s16.mir @@ -13,7 +13,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -36,7 +36,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vsv ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -59,7 +59,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -82,7 +82,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fabs_lhs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -106,7 +106,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fabs_rhs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 2, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -130,7 +130,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_lhs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -155,7 +155,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_vvv_fneg_fabs_rhs ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -180,7 +180,7 @@ body: | ; GFX8-LABEL: name: fadd_s16_fneg_copy_sgpr ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $exec + ; GFX8: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_ADD_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir index 064e06a684c3f1..65482f3cee967a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s32.mir @@ -13,7 +13,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -34,7 +34,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_vsv ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -55,7 +55,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -76,7 +76,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fabs_lhs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -97,7 +97,7 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX6-LABEL: name: fadd_s32_vvv_fabs_rhs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -119,7 +119,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_lhs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -141,7 +141,7 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX6-LABEL: name: fadd_s32_vvv_fneg_fabs_rhs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -166,7 +166,7 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -191,7 +191,7 @@ body: | ; GFX6-LABEL: name: fadd_s32_copy_fneg_copy_fabs ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[COPY]], 3, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:sgpr(s32) = COPY $sgpr0 @@ -222,7 +222,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -249,7 +249,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -276,7 +276,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir index 0525e5ecc15c58..b4b9e2ce1385e7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fadd.s64.mir @@ -13,7 +13,7 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -34,7 +34,7 @@ body: | ; GFX6-LABEL: name: fadd_s64_vsv ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -55,7 +55,7 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvs ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -76,7 +76,7 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fabs_lhs ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 2, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -97,7 +97,7 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-LABEL: name: fadd_s64_vvv_fabs_rhs ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -119,7 +119,7 @@ body: | ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_lhs ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 3, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -141,7 +141,7 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX6-LABEL: name: fadd_s64_vvv_fneg_fabs_rhs ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -167,7 +167,7 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 0, [[COPY]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:sgpr(s64) = COPY $sgpr0_sgpr1 @@ -196,7 +196,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 2, [[COPY2]], 2, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -223,7 +223,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 1, [[COPY2]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 @@ -250,7 +250,7 @@ body: | ; GFX6: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 ; GFX6: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY1]] - ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $exec + ; GFX6: [[V_ADD_F64_:%[0-9]+]]:vreg_64 = V_ADD_F64 3, [[COPY2]], 3, [[COPY3]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_ADD_F64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir index 75086984a142d7..7bf63ebfa1fa32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -17,11 +17,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_f16_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -44,11 +44,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_f16_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] ; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -72,11 +72,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 @@ -99,11 +99,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 @@ -126,11 +126,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_v2f16_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX8: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 @@ -153,11 +153,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_v2f16_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX8: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 0, 15360, 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] ; GFX9-LABEL: name: fcanonicalize_v2f16_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = G_FCANONICALIZE %0 @@ -180,11 +180,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f64_denorm ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MAX_F64_]] ; GFX9-LABEL: name: fcanonicalize_f64_denorm ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 @@ -207,11 +207,11 @@ body: | ; GFX8-LABEL: name: fcanonicalize_f64_flush ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX8: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F64_]] ; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 @@ -233,11 +233,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 @@ -261,11 +261,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fabs_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 @@ -288,11 +288,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 @@ -315,11 +315,11 @@ body: | liveins: $vgpr0 ; GFX8-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 @@ -344,13 +344,13 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 @@ -376,13 +376,13 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX8: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX8: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX8: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] ; GFX9-LABEL: name: fcanonicalize_fneg_fabs_f32_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir index 70c5b76d758fc0..fdf6dcfb8d1068 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fceil_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_CEIL_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCEIL %0 @@ -34,7 +34,7 @@ body: | ; CHECK-LABEL: name: fceil_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_CEIL_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FCEIL %0 @@ -54,7 +54,7 @@ body: | ; CHECK-LABEL: name: fceil_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_FCEIL %0 @@ -74,7 +74,7 @@ body: | ; CHECK-LABEL: name: fceil_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCEIL %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir index 92b615e8cf6eb9..75a78190e62e09 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -38,7 +38,7 @@ body: | ; GCN-LABEL: name: fceil_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -60,7 +60,7 @@ body: | ; GCN-LABEL: name: fceil_s16_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -82,7 +82,7 @@ body: | ; GCN-LABEL: name: fceil_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir index 74f9154bd96639..c052f484bff3ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.mir @@ -37,13 +37,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -62,13 +62,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -87,13 +87,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -112,13 +112,13 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -137,13 +137,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -162,13 +162,13 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_one_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LG_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -187,13 +187,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_O_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_O_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_O_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -212,13 +212,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_U_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -237,13 +237,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLG_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -262,13 +262,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -287,13 +287,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -312,13 +312,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGE_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -337,13 +337,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -362,13 +362,13 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s32_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] ; WAVE32-LABEL: name: fcmp_une_s32_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NEQ_F32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -435,13 +435,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -460,13 +460,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -485,13 +485,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -510,13 +510,13 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -535,13 +535,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -560,13 +560,13 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_one_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LG_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -585,13 +585,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_O_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_O_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_O_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -610,13 +610,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_U_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -635,13 +635,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLG_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -660,13 +660,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -685,13 +685,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -710,13 +710,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGE_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -735,13 +735,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGT_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -760,13 +760,13 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s64_vv ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE64: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] ; WAVE32-LABEL: name: fcmp_une_s64_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; WAVE32: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NEQ_F64_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F64_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 @@ -809,14 +809,14 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s32_vv_select_user ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_CMP_EQ_F32_e64_]], implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] %0:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir index 42b017b409a59f..a0354d84039325 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcmp.s16.mir @@ -43,13 +43,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oeq_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oeq_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_EQ_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -70,13 +70,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ogt_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ogt_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GT_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -97,13 +97,13 @@ body: | ; WAVE64-LABEL: name: fcmp_oge_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_oge_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_GE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_GE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -124,13 +124,13 @@ body: | ; WAVE64-LABEL: name: fcmp_olt_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_olt_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LT_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -151,13 +151,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ole_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ole_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -177,13 +177,13 @@ body: | ; WAVE64-LABEL: name: fcmp_one_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_one_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -204,13 +204,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ord_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ord_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_LG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_LG_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -231,13 +231,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uno_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uno_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_U_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_U_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_U_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -258,13 +258,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ueq_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ueq_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLG_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLG_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLG_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -285,13 +285,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ugt_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ugt_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -312,13 +312,13 @@ body: | ; WAVE64-LABEL: name: fcmp_uge_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_uge_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NLT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NLT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NLT_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -339,13 +339,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ult_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ult_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGE_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGE_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -366,13 +366,13 @@ body: | ; WAVE64-LABEL: name: fcmp_ule_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] ; WAVE32-LABEL: name: fcmp_ule_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NGT_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NGT_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NGT_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -393,13 +393,13 @@ body: | ; WAVE64-LABEL: name: fcmp_une_s16_vv ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE64: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE64: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] ; WAVE32-LABEL: name: fcmp_une_s16_vv ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; WAVE32: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $exec + ; WAVE32: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NEQ_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, implicit $mode, implicit $exec ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NEQ_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir index bb2ba24d411c50..a0339fa9551e2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fexp2.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fexp2_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FEXP2 %0 @@ -34,7 +34,7 @@ body: | ; CHECK-LABEL: name: fexp2_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_EXP_F32_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_EXP_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FEXP2 %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir index fa462ac93b06b9..68bde4c25b64de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir @@ -38,7 +38,7 @@ body: | ; VI-LABEL: name: ffloor_s16_vv ; VI: liveins: $vgpr0 ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -60,7 +60,7 @@ body: | ; VI-LABEL: name: ffloor_s16_vs ; VI: liveins: $sgpr0 ; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -90,7 +90,7 @@ body: | ; VI-LABEL: name: ffloor_fneg_s16_vv ; VI: liveins: $vgpr0 ; VI: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; VI: [[V_FLOOR_F16_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; VI: $vgpr0 = COPY [[V_FLOOR_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir index 611eab6bfa6924..710a7927acd29c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s32.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: ffloor_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FFLOOR %0 @@ -34,7 +34,7 @@ body: | ; CHECK-LABEL: name: ffloor_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FFLOOR %0 @@ -54,7 +54,7 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = G_FNEG %0 @@ -74,7 +74,7 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F32_e64_:%[0-9]+]]:vgpr_32 = V_FLOOR_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_FLOOR_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir index 1af481c27a97d9..276a1ffb9930db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s64.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: ffloor_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FFLOOR %0 @@ -50,7 +50,7 @@ body: | ; CHECK-LABEL: name: ffloor_fneg_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_FLOOR_F64_e64_:%[0-9]+]]:vreg_64 = V_FLOOR_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_FLOOR_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir index c812fc48f13541..2034eb73fdf060 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fma.s32.mir @@ -17,20 +17,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -54,20 +54,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src0 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -92,20 +92,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src1 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] ; GFX10-LABEL: name: fma_f32_fneg_src1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e64 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -130,20 +130,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32_fneg_src2 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX10-LABEL: name: fma_f32_fneg_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -168,20 +168,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32_fabs_src2 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX10-LABEL: name: fma_f32_fabs_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -206,20 +206,20 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX9-DL-LABEL: name: fma_f32_copy_fneg_src2 ; GFX9-DL: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-DL: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX9-DL: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX9-DL: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX9-DL: S_ENDPGM 0, implicit [[V_FMA_F32_]] ; GFX10-LABEL: name: fma_f32_copy_fneg_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_FMA_F32_:%[0-9]+]]:vgpr_32 = V_FMA_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_FMA_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir index 019bcd5cf2f3cc..99e776b5d0ff63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmad.s32.mir @@ -16,14 +16,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] ; GFX10-LABEL: name: fmad_f32 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAC_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -47,14 +47,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] ; GFX10-LABEL: name: fmad_f32_fneg_src0 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 1, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -79,14 +79,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] ; GFX10-LABEL: name: fmad_f32_fneg_src1 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 1, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -111,14 +111,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] ; GFX10-LABEL: name: fmad_f32_fneg_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -143,14 +143,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] ; GFX10-LABEL: name: fmad_f32_fabs_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 2, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -175,14 +175,14 @@ body: | ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX6: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX6: S_ENDPGM 0, implicit [[V_MAD_F32_]] ; GFX10-LABEL: name: fmad_f32_copy_fneg_src2 ; GFX10: $vcc_hi = IMPLICIT_DEF ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $exec + ; GFX10: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[COPY]], 0, [[COPY1]], 1, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX10: S_ENDPGM 0, implicit [[V_MAD_F32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir index 636b1d2dda6949..720f9285961a2b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.mir @@ -21,15 +21,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MAX_F64_]], implicit [[V_MAX_F64_1]], implicit [[V_MAX_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -89,15 +89,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MAX_F64_]], implicit [[V_MAX_F64_1]], implicit [[V_MAX_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir index 32ef48fcf4daa9..e94ab1c3cdc56b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.s16.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fmaxnum_ieee_f16_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -36,7 +36,7 @@ body: | ; CHECK-LABEL: name: fmaxnum_ieee_f16_v_fneg_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir index 3028a1f1493fe9..bc2e53d421c2c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum-ieee.v2s16.mir @@ -13,7 +13,7 @@ body: | ; GFX9-LABEL: name: fmaxnum_ieee_v2f16_vv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir index 020e171d3fd575..a440e801682f33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.mir @@ -22,15 +22,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MAX_F64_]], implicit [[V_MAX_F64_1]], implicit [[V_MAX_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -88,15 +88,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MAX_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_1:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MAX_F64_2:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MAX_F64_]], implicit [[V_MAX_F64_1]], implicit [[V_MAX_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir index e1caa4cce7e87c..1bf0c576adfebd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.s16.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fmaxnum_f16_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -36,7 +36,7 @@ body: | ; CHECK-LABEL: name: fmaxnum_f16_v_fneg_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir index 0b3b1a9ff9d649..bc83f90c8a1133 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmaxnum.v2s16.mir @@ -14,7 +14,7 @@ body: | ; GFX9-LABEL: name: fmaxnum_v2f16_vv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MAX_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir index d6ac32e4154349..40b97460b20319 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.mir @@ -21,15 +21,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MIN_F64_]], implicit [[V_MIN_F64_1]], implicit [[V_MIN_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -89,15 +89,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MIN_F64_]], implicit [[V_MIN_F64_1]], implicit [[V_MIN_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir index 432243ec9c9c2b..cf00b1b1d80a41 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.s16.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fminnum_ieee_f16_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -36,7 +36,7 @@ body: | ; CHECK-LABEL: name: fminnum_ieee_f16_v_fneg_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir index 13853bf90c5c06..0bb68ef86ed98f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum-ieee.v2s16.mir @@ -13,7 +13,7 @@ body: | ; GFX9-LABEL: name: fminnum_ieee_v2f16_vv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir index 1f4decb7826a48..74350b247fc486 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.mir @@ -22,15 +22,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MIN_F64_]], implicit [[V_MIN_F64_1]], implicit [[V_MIN_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -88,15 +88,15 @@ body: | ; GFX7: [[COPY4:%[0-9]+]]:sreg_64 = COPY $sgpr10_sgpr11 ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 - ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GFX7: FLAT_STORE_DWORD [[COPY3]], [[V_MIN_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) - ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $exec - ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $exec + ; GFX7: [[V_MIN_F64_:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_1:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; GFX7: [[V_MIN_F64_2:%[0-9]+]]:vreg_64 = V_MIN_F64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec ; GFX7: S_ENDPGM 0, implicit [[V_MIN_F64_]], implicit [[V_MIN_F64_1]], implicit [[V_MIN_F64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir index 71d7b6e8c6df42..0a4f65544a4670 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.s16.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: fminnum_f16_vv ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 @@ -36,7 +36,7 @@ body: | ; CHECK-LABEL: name: fminnum_f16_v_fneg_v ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $exec + ; CHECK: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_F16_e64 0, [[COPY]], 1, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; CHECK: S_ENDPGM 0, implicit [[V_MIN_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir index 84afe51ca3cfca..255d05d39f004e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fminnum.v2s16.mir @@ -13,7 +13,7 @@ body: | ; GFX9-LABEL: name: fminnum_v2f16_vv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MIN_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir index c7dbeada2ca42e..babbe653b98003 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir @@ -15,9 +15,9 @@ body: | ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_MUL_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) @@ -53,9 +53,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F64_1:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F64_2:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F64_1:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F64_2:%[0-9]+]]:vreg_64 = V_MUL_F64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MUL_F64_]], implicit [[V_MUL_F64_1]], implicit [[V_MUL_F64_2]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = COPY $vgpr0_vgpr1 @@ -86,9 +86,9 @@ body: | ; GCN-LABEL: name: fmul_f16 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]], implicit [[V_MUL_F16_e64_1]], implicit [[V_MUL_F16_e64_2]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = COPY $vgpr0 @@ -123,16 +123,16 @@ body: | ; GCN-LABEL: name: fmul_modifiers_f32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_3:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_4:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_MUL_F32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GCN: FLAT_STORE_DWORD [[COPY1]], [[V_MUL_F32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir index 665a3589831d2d..b70c8e25ccd1e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.v2s16.mir @@ -13,7 +13,7 @@ body: | ; GFX9-LABEL: name: fmul_v2f16_vv ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 8, [[COPY]], 8, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -33,7 +33,7 @@ body: | ; GFX9-LABEL: name: fmul_v2f16_fneg_v_fneg_v ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_F16 11, [[COPY]], 11, [[COPY1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]] %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(<2 x s16>) = COPY $vgpr1 @@ -60,7 +60,7 @@ body: | ; GFX9: [[FNEG:%[0-9]+]]:vgpr(s16) = G_FNEG [[TRUNC]] ; GFX9: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FNEG]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:vgpr_32(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[COPY2]](s32) - ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_PK_MUL_F16 8, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 8, [[COPY]](<2 x s16>), 0, 0, 0, 0, 0, implicit $exec + ; GFX9: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32(<2 x s16>) = V_PK_MUL_F16 8, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 8, [[COPY]](<2 x s16>), 0, 0, 0, 0, 0, implicit $mode, implicit $exec ; GFX9: S_ENDPGM 0, implicit [[V_PK_MUL_F16_]](<2 x s16>) %0:vgpr(<2 x s16>) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir index e1e8c0e250be2e..64662d748cd175 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -14,7 +14,7 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FPTOSI %0 @@ -34,7 +34,7 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FPTOSI %0 @@ -54,7 +54,7 @@ body: | ; GCN-LABEL: name: fptosi_s32_to_s32_fneg_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_CVT_I32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 @@ -75,8 +75,8 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $exec - ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -97,8 +97,8 @@ body: | ; GCN-LABEL: name: fptosi_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $exec - ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -121,8 +121,8 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $exec - ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir index e6736f2d714755..a13620ad94520f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -15,8 +15,8 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $exec - ; GCN: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: [[V_CVT_U32_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN: [[V_CVT_U32_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GCN: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; GCN: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_U32_F32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) %0:sgpr(s32) = COPY $sgpr0 @@ -48,8 +48,8 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s32_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $exec - ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -70,8 +70,8 @@ body: | ; GCN-LABEL: name: fptoui_s16_to_s32_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $exec - ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -94,8 +94,8 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $exec - ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $exec + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec + ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_CVT_F32_F16_e32_]], implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir index 45a8551ee47e3b..316046edaad471 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.mir @@ -15,7 +15,7 @@ body: | ; GCN-LABEL: name: frint_s32_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FRINT %0 @@ -35,7 +35,7 @@ body: | ; GCN-LABEL: name: frint_s32_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_FRINT %0 @@ -55,7 +55,7 @@ body: | ; GCN-LABEL: name: frint_fneg_s32_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F32_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F32_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 @@ -76,7 +76,7 @@ body: | ; GCN-LABEL: name: frint_s64_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = V_RNDNE_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0_vgpr1 = COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FRINT %0 @@ -96,7 +96,7 @@ body: | ; GCN-LABEL: name: frint_s64_fneg_vv ; GCN: liveins: $vgpr0_vgpr1 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GCN: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F64_e64_:%[0-9]+]]:vreg_64 = V_RNDNE_F64_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0_vgpr1 = COPY [[V_RNDNE_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FNEG %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir index c72ea740a3986e..e449a7b93baa0d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir @@ -38,7 +38,7 @@ body: | ; GCN-LABEL: name: frint_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -60,7 +60,7 @@ body: | ; GCN-LABEL: name: frint_s16_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -82,7 +82,7 @@ body: | ; GCN-LABEL: name: frint_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir index 550f47c5471a55..a9cd8c51f62a63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.mir @@ -14,7 +14,7 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s32_vv ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_TRUNC_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 @@ -34,7 +34,7 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s32_vs ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_TRUNC_F32_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0 = COPY [[V_TRUNC_F32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_INTRINSIC_TRUNC %0 @@ -54,7 +54,7 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s64_sv ; CHECK: liveins: $sgpr0_sgpr1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_TRUNC_F64_e64_]] %0:sgpr(s64) = COPY $sgpr0_sgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 @@ -74,7 +74,7 @@ body: | ; CHECK-LABEL: name: intrinsic_trunc_s64_vv ; CHECK: liveins: $vgpr0_vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: [[V_TRUNC_F64_e64_:%[0-9]+]]:vreg_64 = V_TRUNC_F64_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; CHECK: $vgpr0_vgpr1 = COPY [[V_TRUNC_F64_e64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_INTRINSIC_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir index 1bf97cac9602aa..d2fb035c8b6b2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir @@ -14,7 +14,7 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 @@ -36,7 +36,7 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_s16_vs ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 @@ -58,7 +58,7 @@ body: | ; GCN-LABEL: name: intrinsic_trunc_fneg_s16_vv ; GCN: liveins: $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir index e68fda19d493b2..3cd2362b10934b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir @@ -16,8 +16,8 @@ body: | ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE64: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $exec - ; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $exec + ; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE32-LABEL: name: sitofp @@ -25,8 +25,8 @@ body: | ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; WAVE32: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $exec - ; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $exec + ; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec + ; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %0:sgpr(s32) = COPY $sgpr0 @@ -58,15 +58,15 @@ body: | ; WAVE64-LABEL: name: sitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $exec - ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $exec + ; WAVE64: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $exec - ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $exec + ; WAVE32: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_SITOFP %0 @@ -87,15 +87,15 @@ body: | ; WAVE64-LABEL: name: sitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $exec - ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $exec + ; WAVE64: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] ; WAVE32-LABEL: name: sitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $exec - ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $exec + ; WAVE32: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_I32_e32_]], implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_SITOFP %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir index d35f7c428a470e..421b987f8f9220 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir @@ -15,13 +15,13 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s32_vv ; WAVE64: liveins: $vgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $exec + ; WAVE64: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vv ; WAVE32: liveins: $vgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $exec + ; WAVE32: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_UITOFP %0 @@ -41,13 +41,13 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s32_vs ; WAVE64: liveins: $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $exec + ; WAVE64: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] ; WAVE32-LABEL: name: uitofp_s32_to_s32_vs ; WAVE32: liveins: $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $exec + ; WAVE32: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F32_U32_e64_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s32) = G_UITOFP %0 @@ -67,15 +67,15 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s16_vv ; WAVE64: liveins: $vgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $exec - ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $exec + ; WAVE64: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vv ; WAVE32: liveins: $vgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $exec - ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $exec + ; WAVE32: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_UITOFP %0 @@ -96,15 +96,15 @@ body: | ; WAVE64-LABEL: name: uitofp_s32_to_s16_vs ; WAVE64: liveins: $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE64: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $exec - ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $exec + ; WAVE64: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE64: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec ; WAVE64: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] ; WAVE32-LABEL: name: uitofp_s32_to_s16_vs ; WAVE32: liveins: $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; WAVE32: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $exec - ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $exec + ; WAVE32: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec + ; WAVE32: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F16_F32_e32 [[V_CVT_F32_U32_e32_]], implicit $mode, implicit $exec ; WAVE32: $vgpr0 = COPY [[V_CVT_F16_F32_e32_]] %0:sgpr(s32) = COPY $sgpr0 %1:vgpr(s16) = G_UITOFP %0 diff --git a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir index 603d0cf33f90ca..2bb21dec55a259 100644 --- a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir +++ b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir @@ -13,14 +13,14 @@ body: | ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec ; GCN: } - ; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec - ; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec + ; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec + ; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec { $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec } - $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec - $vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + $vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec ... --- @@ -29,14 +29,14 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: dst_bundle_latency - ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec - ; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec + ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec + ; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec ; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec { ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec ; GCN: } - $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec - $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec + $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec + $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec { GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index f631bcd258115a..f78ad501cebfd9 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s --- # GCN-LABEL: name: v_max_self_clamp_not_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN-NEXT: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec name: v_max_self_clamp_not_set_f32 tracksRegLiveness: true @@ -56,16 +56,16 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- # GCN-LABEL: name: v_clamp_omod_already_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec name: v_clamp_omod_already_set_f32 tracksRegLiveness: true registers: @@ -118,8 +118,8 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -127,8 +127,8 @@ body: | # Don't fold a mul that looks like an omod if itself has omod set # GCN-LABEL: name: v_omod_mul_omod_already_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec name: v_omod_mul_omod_already_set_f32 tracksRegLiveness: true registers: @@ -181,8 +181,8 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -191,8 +191,8 @@ body: | # Don't fold a mul that looks like an omod if itself has clamp set # This might be OK, but would require folding the clamp at the same time. # GCN-LABEL: name: v_omod_mul_clamp_already_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec name: v_omod_mul_clamp_already_set_f32 tracksRegLiveness: true @@ -246,8 +246,8 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -269,8 +269,8 @@ body: | # Don't fold a mul that looks like an omod if itself has omod set # GCN-LABEL: name: v_omod_add_omod_already_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec name: v_omod_add_omod_already_set_f32 tracksRegLiveness: true registers: @@ -323,8 +323,8 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -333,8 +333,8 @@ body: | # Don't fold a mul that looks like an omod if itself has clamp set # This might be OK, but would require folding the clamp at the same time. # GCN-LABEL: name: v_omod_add_clamp_already_set_f32 -# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec -# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec name: v_omod_add_clamp_already_set_f32 tracksRegLiveness: true @@ -388,8 +388,8 @@ body: | %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec - %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec + %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec + %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -407,6 +407,6 @@ body: | liveins: $vgpr0 %0 = COPY $vgpr0 - %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $exec + %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $mode, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir index a187cd11ed9190..80a201bbfdd00c 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir +++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir @@ -15,6 +15,6 @@ body: | bb.0: %0 = IMPLICIT_DEF %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec + %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir index 96f35605b1c9e1..1ef5c1098b6390 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir @@ -22,10 +22,10 @@ tracksRegLiveness: true body: | bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) - %0:vgpr_32 = V_MUL_F32_e32 0, undef %1:vgpr_32, implicit $exec - %2:vgpr_32 = V_CVT_U32_F32_e32 killed %0, implicit $exec - %3:vgpr_32 = V_CVT_F32_I32_e32 killed %2, implicit $exec - %4:vgpr_32 = V_CVT_U32_F32_e32 killed %3, implicit $exec + %0:vgpr_32 = nofpexcept V_MUL_F32_e32 0, undef %1:vgpr_32, implicit $mode, implicit $exec + %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %0, implicit $mode, implicit $exec + %3:vgpr_32 = nofpexcept V_CVT_F32_I32_e32 killed %2, implicit $mode, implicit $exec + %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %3, implicit $mode, implicit $exec %5:vgpr_32 = V_LSHRREV_B32_e32 4, killed %4, implicit $exec S_CBRANCH_SCC0 %bb.2, implicit undef $scc @@ -126,7 +126,7 @@ body: | %27.sub6:sgpr_256 = COPY %26 %27.sub7:sgpr_256 = COPY killed %26 %28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) - %29:vgpr_32 = V_ADD_F32_e32 0, killed %28, implicit $exec + %29:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %28, implicit $mode, implicit $exec $m0 = S_MOV_B32 -1 DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir index 8bcff8a99f45dd..848011a8faac23 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -41,10 +41,10 @@ body: | bb.2: successors: %bb.4(0x80000000) - %6:vgpr_32 = V_MUL_F32_e32 1031798784, undef %7:vgpr_32, implicit $exec - %8:vgpr_32 = V_FLOOR_F32_e32 killed %6, implicit $exec - %9:vgpr_32 = V_ADD_F32_e32 0, killed %8, implicit $exec - %10:vgpr_32 = V_CVT_U32_F32_e32 killed %9, implicit $exec + %6:vgpr_32 = nofpexcept V_MUL_F32_e32 1031798784, undef %7:vgpr_32, implicit $mode, implicit $exec + %8:vgpr_32 = nofpexcept V_FLOOR_F32_e32 killed %6, implicit $mode, implicit $exec + %9:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %8, implicit $mode, implicit $exec + %10:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %9, implicit $mode, implicit $exec %11:vgpr_32 = V_LSHLREV_B32_e32 1, killed %10, implicit $exec %12:sreg_64 = S_MOV_B64 0 %13:sgpr_128 = COPY killed %2 @@ -243,8 +243,8 @@ body: | S_BRANCH %bb.3 bb.17: - %105:vgpr_32 = V_ADD_F32_e32 target-flags(amdgpu-rel32-lo) 0, %20.sub3, implicit $exec - %106:vgpr_32 = V_ADD_F32_e32 target-flags(amdgpu-gotprel32-hi) 0, killed %20.sub2, implicit $exec + %105:vgpr_32 = nofpexcept V_ADD_F32_e32 target-flags(amdgpu-rel32-lo) 0, %20.sub3, implicit $mode, implicit $exec + %106:vgpr_32 = nofpexcept V_ADD_F32_e32 target-flags(amdgpu-gotprel32-hi) 0, killed %20.sub2, implicit $mode, implicit $exec undef %107.sub0:vreg_64 = COPY killed %106 %107.sub1:vreg_64 = COPY killed %105 $exec = S_AND_B64 $exec, killed %0, implicit-def dead $scc @@ -258,11 +258,11 @@ body: | %109.sub6:sgpr_256 = COPY %108 %109.sub7:sgpr_256 = COPY killed %108 %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) - %112:vgpr_32 = V_MUL_F32_e32 0, killed %110, implicit $exec - %113:vgpr_32 = V_MUL_F32_e32 0, killed %112, implicit $exec - %114:vgpr_32 = V_MAD_F32 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $exec - %115:vgpr_32 = V_MAX_F32_e32 0, killed %114, implicit $exec - %116:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, killed %115, 0, 1065353216, 0, 0, implicit $exec + %112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec + %113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec + %114:vgpr_32 = nofpexcept V_MAD_F32 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %115:vgpr_32 = nofpexcept V_MAX_F32_e32 0, killed %114, implicit $mode, implicit $exec + %116:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed %115, 0, 1065353216, 0, 0, implicit $mode, implicit $exec EXP 0, undef %117:vgpr_32, killed %116, undef %118:vgpr_32, undef %119:vgpr_32, -1, -1, 15, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir index c3a945716f77da..47ecb6c58538f2 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir @@ -72,9 +72,9 @@ body: | %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, implicit $exec undef %13.sub1:vreg_128 = COPY %9.sub1 %13.sub2:vreg_128 = COPY %9.sub2 - %14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec - %15:vgpr_32 = V_ADD_F32_e32 1065353216, undef %16:vgpr_32, implicit $exec - %17:sreg_64 = V_CMP_GT_F32_e64 0, 0, 0, killed %15, 0, implicit $exec + %14:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $mode, implicit $exec + %15:vgpr_32 = nofpexcept V_ADD_F32_e32 1065353216, undef %16:vgpr_32, implicit $mode, implicit $exec + %17:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, 0, 0, killed %15, 0, implicit $mode, implicit $exec %18:sreg_64 = S_AND_B64 killed %17, killed %14, implicit-def dead $scc %19:sreg_64 = COPY %10 %20:vreg_128 = COPY %13 @@ -127,8 +127,8 @@ body: | bb.13: successors: %bb.14(0x80000000) - %32:vgpr_32 = V_MUL_F32_e32 undef %33:vgpr_32, killed %30.sub1, implicit $exec - %34:vgpr_32 = V_MUL_F32_e32 undef %35:vgpr_32, killed %32, implicit $exec + %32:vgpr_32 = nofpexcept V_MUL_F32_e32 undef %33:vgpr_32, killed %30.sub1, implicit $mode, implicit $exec + %34:vgpr_32 = nofpexcept V_MUL_F32_e32 undef %35:vgpr_32, killed %32, implicit $mode, implicit $exec undef %36.sub0:vreg_128 = COPY %34 %31:vreg_128 = COPY killed %36 @@ -144,30 +144,30 @@ body: | bb.16: successors: %bb.17(0x80000000) - %39:vgpr_32 = V_FMA_F32 0, undef %40:vgpr_32, 0, killed %37.sub0, 0, undef %41:vgpr_32, 0, 0, implicit $exec - %42:vgpr_32 = V_FMA_F32 0, undef %43:vgpr_32, 0, undef %44:vgpr_32, 0, killed %39, 0, 0, implicit $exec - %45:vgpr_32 = V_FMA_F32 0, undef %46:vgpr_32, 0, undef %47:vgpr_32, 0, killed %42, 0, 0, implicit $exec - dead %48:vgpr_32 = V_MUL_F32_e32 undef %49:vgpr_32, killed %45, implicit $exec - %50:vgpr_32 = V_MUL_F32_e32 undef %51:vgpr_32, undef %52:vgpr_32, implicit $exec + %39:vgpr_32 = nofpexcept V_FMA_F32 0, undef %40:vgpr_32, 0, killed %37.sub0, 0, undef %41:vgpr_32, 0, 0, implicit $mode, implicit $exec + %42:vgpr_32 = nofpexcept V_FMA_F32 0, undef %43:vgpr_32, 0, undef %44:vgpr_32, 0, killed %39, 0, 0, implicit $mode, implicit $exec + %45:vgpr_32 = nofpexcept V_FMA_F32 0, undef %46:vgpr_32, 0, undef %47:vgpr_32, 0, killed %42, 0, 0, implicit $mode, implicit $exec + dead %48:vgpr_32 = nofpexcept V_MUL_F32_e32 undef %49:vgpr_32, killed %45, implicit $mode, implicit $exec + %50:vgpr_32 = nofpexcept V_MUL_F32_e32 undef %51:vgpr_32, undef %52:vgpr_32, implicit $mode, implicit $exec undef %53.sub1:vreg_128 = COPY %50 %38:vreg_128 = COPY killed %53 bb.17: %54:vreg_128 = COPY killed %38 - %55:vgpr_32 = V_FMA_F32 0, killed %54.sub1, 0, target-flags(amdgpu-gotprel32-lo) 1056964608, 0, 1056964608, 0, 0, implicit $exec + %55:vgpr_32 = nofpexcept V_FMA_F32 0, killed %54.sub1, 0, target-flags(amdgpu-gotprel32-lo) 1056964608, 0, 1056964608, 0, 0, implicit $mode, implicit $exec EXP 1, undef %56:vgpr_32, killed %55, undef %57:vgpr_32, undef %58:vgpr_32, -1, 0, 15, implicit $exec S_ENDPGM 0 bb.18: successors: %bb.7(0x80000000) - dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec + dead %59:vgpr_32 = nofpexcept V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $mode, implicit $exec dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, implicit $exec undef %66.sub1:vreg_128 = COPY %13.sub1 %66.sub2:vreg_128 = COPY %13.sub2 - %67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec - %69:vgpr_32 = V_ADD_F32_e32 1065353216, undef %70:vgpr_32, implicit $exec - %71:vgpr_32 = V_ADD_F32_e32 1065353216, killed %69, implicit $exec - %72:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, killed %71, 0, implicit $exec + %67:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $mode, implicit $exec + %69:vgpr_32 = nofpexcept V_ADD_F32_e32 1065353216, undef %70:vgpr_32, implicit $mode, implicit $exec + %71:vgpr_32 = nofpexcept V_ADD_F32_e32 1065353216, killed %69, implicit $mode, implicit $exec + %72:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, killed %71, 0, implicit $mode, implicit $exec %73:sreg_64 = S_OR_B64 killed %72, killed %67, implicit-def dead $scc %74:sreg_64 = S_OR_B64 killed %73, killed %10, implicit-def dead $scc %19:sreg_64 = COPY killed %74 diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir index a666428ded9197..85dcacb93ffc50 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -48,11 +48,11 @@ body: | %4.sub6:sgpr_256 = COPY %1 %4.sub7:sgpr_256 = COPY killed %1 %5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) - %6:vgpr_32 = V_MAD_F32 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec - %7:vgpr_32 = V_RCP_F32_e32 killed %6, implicit $exec - %8:vgpr_32 = V_MUL_F32_e32 0, killed %7, implicit $exec - %9:vgpr_32 = V_MAD_F32 0, killed %8, 0, 0, 0, 0, 0, 0, implicit $exec - dead %10:vgpr_32 = V_MAC_F32_e32 undef %11:vgpr_32, undef %12:vgpr_32, undef %10, implicit $exec + %6:vgpr_32 = nofpexcept V_MAD_F32 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %7:vgpr_32 = nofpexcept V_RCP_F32_e32 killed %6, implicit $mode, implicit $exec + %8:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %7, implicit $mode, implicit $exec + %9:vgpr_32 = nofpexcept V_MAD_F32 0, killed %8, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + dead %10:vgpr_32 = nofpexcept V_MAC_F32_e32 undef %11:vgpr_32, undef %12:vgpr_32, undef %10, implicit $mode, implicit $exec undef %13.sub0:vreg_128 = COPY %9 %14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec S_CBRANCH_SCC0 %bb.4, implicit undef $scc @@ -65,12 +65,12 @@ body: | bb.4: successors: %bb.5(0x40000000), %bb.7(0x40000000) - %17:vgpr_32 = V_MAD_F32 0, killed %9, 0, 0, 0, 0, 0, 0, implicit $exec - %18:vgpr_32 = V_MIN_F32_e32 1065353216, killed %17, implicit $exec - %19:sreg_64_xexec = V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %18, 0, implicit $exec + %17:vgpr_32 = nofpexcept V_MAD_F32 0, killed %9, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %18:vgpr_32 = nofpexcept V_MIN_F32_e32 1065353216, killed %17, implicit $mode, implicit $exec + %19:sreg_64_xexec = nofpexcept V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %18, 0, implicit $mode, implicit $exec %20:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec %21:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %20, killed %19, implicit $exec - %22:sreg_64 = V_CMP_LT_F32_e64 0, 0, 0, killed %21, 0, implicit $exec + %22:sreg_64 = nofpexcept V_CMP_LT_F32_e64 0, 0, 0, killed %21, 0, implicit $mode, implicit $exec %23:sreg_64 = COPY $exec, implicit-def $exec %24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc $exec = S_MOV_B64_term killed %24 @@ -140,11 +140,11 @@ body: | bb.14: successors: %bb.15(0x40000000), %bb.16(0x40000000) - %38:vgpr_32 = V_MAD_F32 0, killed %36.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec - %39:vgpr_32 = V_MAD_F32 0, killed %38, 0, 0, 0, 0, 0, 0, implicit $exec - %40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec - %41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec - %42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec + %38:vgpr_32 = nofpexcept V_MAD_F32 0, killed %36.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %39:vgpr_32 = nofpexcept V_MAD_F32 0, killed %38, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %40:vgpr_32 = nofpexcept V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $mode, implicit $exec + %41:vgpr_32 = nofpexcept V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $mode, implicit $exec + %42:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 killed %41, implicit $mode, implicit $exec %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0, 0 :: (dereferenceable invariant load 4) %45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec %46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir b/llvm/test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir index c2f903d0114c4b..c08e26d4f30ec2 100644 --- a/llvm/test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir @@ -158,8 +158,8 @@ body: | %99:vreg_128 = COPY killed %113 %77:sreg_64_xexec = V_CMP_EQ_U32_e64 target-flags(amdgpu-gotprel32-lo) 0, killed %99.sub1, implicit $exec %79:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, killed %77, implicit $exec - %81:vgpr_32 = V_MUL_F32_e32 0, killed %79, implicit $exec - %82:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %83:vgpr_32, 0, killed %81, 0, 0, implicit $exec + %81:vgpr_32 = V_MUL_F32_e32 0, killed %79, implicit $exec, implicit $mode + %82:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %83:vgpr_32, 0, killed %81, 0, 0, implicit $exec, implicit $mode EXP_DONE 0, undef %84:vgpr_32, killed %82, undef %85:vgpr_32, undef %86:vgpr_32, -1, -1, 15, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir index 1605880a59e41a..91901b7233762f 100644 --- a/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir +++ b/llvm/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir @@ -17,9 +17,9 @@ body: | ; GCN-LABEL: name: _amdgpu_ps_main ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) - ; GCN: [[V_TRUNC_F32_e32_:%[0-9]+]]:vgpr_32 = V_TRUNC_F32_e32 undef %4:vgpr_32, implicit $exec - ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_U32_F32_e32 [[V_TRUNC_F32_e32_]], implicit $exec - ; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 4, [[V_CVT_U32_F32_e32_]], implicit $exec + ; GCN: %3:vgpr_32 = nofpexcept V_TRUNC_F32_e32 undef %4:vgpr_32, implicit $mode, implicit $exec + ; GCN: %5:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %3, implicit $mode, implicit $exec + ; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 4, %5, implicit $exec ; GCN: undef %11.sub0:vreg_128 = V_MUL_LO_I32 [[V_LSHRREV_B32_e32_]], 3, implicit $exec ; GCN: %11.sub3:vreg_128 = COPY %11.sub0 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 @@ -47,19 +47,19 @@ body: | ; GCN: S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc ; GCN: S_BRANCH %bb.6 ; GCN: bb.5: - ; GCN: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, %11.sub0, implicit $exec - ; GCN: [[V_MIN_F32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_F32_e32 1106771968, [[V_MUL_F32_e32_]], implicit $exec - ; GCN: [[V_MAD_F32_:%[0-9]+]]:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, [[V_MIN_F32_e32_]], 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN: [[V_MAD_F32_1:%[0-9]+]]:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, [[V_MAD_F32_]], 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN: [[V_MAD_F32_2:%[0-9]+]]:vgpr_32 = V_MAD_F32 0, [[V_MAD_F32_1]], 0, 0, 0, 0, 0, 0, implicit $exec - ; GCN: [[V_CVT_PKRTZ_F16_F32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, [[V_MAD_F32_2]], 0, undef %27:vgpr_32, 0, 0, implicit $exec - ; GCN: EXP_DONE 0, [[V_CVT_PKRTZ_F16_F32_e64_]], undef %28:vgpr_32, undef %29:vgpr_32, undef %30:vgpr_32, -1, -1, 15, implicit $exec + ; GCN: %21:vgpr_32 = nofpexcept V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, %11.sub0, implicit $mode, implicit $exec + ; GCN: %22:vgpr_32 = nofpexcept V_MIN_F32_e32 1106771968, %21, implicit $mode, implicit $exec + ; GCN: %23:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, %22, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN: %24:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, %23, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN: %25:vgpr_32 = nofpexcept V_MAD_F32 0, %24, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN: %26:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, %25, 0, undef %27:vgpr_32, 0, 0, implicit $mode, implicit $exec + ; GCN: EXP_DONE 0, %26, undef %28:vgpr_32, undef %29:vgpr_32, undef %30:vgpr_32, -1, -1, 15, implicit $exec ; GCN: S_ENDPGM 0 ; GCN: bb.6: ; GCN: S_ENDPGM 0 bb.0: - %10:vgpr_32 = V_TRUNC_F32_e32 undef %11:vgpr_32, implicit $exec - %12:vgpr_32 = V_CVT_U32_F32_e32 killed %10, implicit $exec + %10:vgpr_32 = nofpexcept V_TRUNC_F32_e32 undef %11:vgpr_32, implicit $mode, implicit $exec + %12:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %10, implicit $mode, implicit $exec %50:vgpr_32 = V_LSHRREV_B32_e32 4, killed %12, implicit $exec %51:vgpr_32 = V_MUL_LO_I32 killed %50, 3, implicit $exec undef %52.sub0:vreg_128 = COPY %51 @@ -102,12 +102,12 @@ body: | S_BRANCH %bb.6 bb.5: - %39:vgpr_32 = V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, killed %55.sub0, implicit $exec - %41:vgpr_32 = V_MIN_F32_e32 1106771968, killed %39, implicit $exec - %42:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %41, 0, 0, 0, 0, 0, 0, implicit $exec - %43:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %42, 0, 0, 0, 0, 0, 0, implicit $exec - %44:vgpr_32 = V_MAD_F32 0, killed %43, 0, 0, 0, 0, 0, 0, implicit $exec - %45:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, killed %44, 0, undef %46:vgpr_32, 0, 0, implicit $exec + %39:vgpr_32 = nofpexcept V_MUL_F32_e32 target-flags(amdgpu-gotprel) 0, killed %55.sub0, implicit $mode, implicit $exec + %41:vgpr_32 = nofpexcept V_MIN_F32_e32 1106771968, killed %39, implicit $mode, implicit $exec + %42:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %41, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %43:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %42, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %44:vgpr_32 = nofpexcept V_MAD_F32 0, killed %43, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %45:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e64 0, killed %44, 0, undef %46:vgpr_32, 0, 0, implicit $mode, implicit $exec EXP_DONE 0, killed %45, undef %47:vgpr_32, undef %48:vgpr_32, undef %49:vgpr_32, -1, -1, 15, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/dead-lane.mir b/llvm/test/CodeGen/AMDGPU/dead-lane.mir index 1477c3302c3405..8e95009e72c6c6 100644 --- a/llvm/test/CodeGen/AMDGPU/dead-lane.mir +++ b/llvm/test/CodeGen/AMDGPU/dead-lane.mir @@ -2,15 +2,15 @@ # GCN-LABEL: name: dead_lane # GCN: bb.0: -# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec +# GCN-NEXT: undef %3.sub0:vreg_64 = nofpexcept V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $mode, implicit $exec # GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, --- name: dead_lane tracksRegLiveness: true body: | bb.0: - %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec - %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec + %1:vgpr_32 = nofpexcept V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $mode, implicit $exec + %2:vgpr_32 = nofpexcept V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $mode, implicit $exec %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1 FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir index 3a35c558e6ac42..2743f766ddeeb8 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir +++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir @@ -32,7 +32,7 @@ body: | ; CHECK: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 1082130432, [[DEF1]], implicit $exec + ; CHECK: %9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, [[DEF1]], implicit $mode, implicit $exec ; CHECK: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: bb.1: @@ -48,29 +48,29 @@ body: | ; CHECK: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] - ; CHECK: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $exec - ; CHECK: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $exec - ; CHECK: [[V_MUL_F32_e32_3:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: %16:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $mode, implicit $exec + ; CHECK: %17:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $mode, implicit $exec + ; CHECK: %18:vgpr_32 = nofpexcept V_MUL_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec ; CHECK: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $exec - ; CHECK: [[V_MUL_F32_e32_4:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $exec - ; CHECK: dead %23:vgpr_32 = V_MUL_F32_e32 [[V_MUL_F32_e32_4]], [[DEF13]], implicit $exec - ; CHECK: dead [[V_MOV_B32_e32_1]]:vgpr_32 = V_MAC_F32_e32 [[V_ADD_F32_e32_]], [[COPY]], [[V_MOV_B32_e32_1]], implicit $exec + ; CHECK: %21:vgpr_32 = nofpexcept V_ADD_F32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], implicit $mode, implicit $exec + ; CHECK: %22:vgpr_32 = nofpexcept V_MUL_F32_e32 [[DEF7]], [[DEF7]], implicit $mode, implicit $exec + ; CHECK: dead %23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, [[DEF13]], implicit $mode, implicit $exec + ; CHECK: dead [[V_MOV_B32_e32_1]]:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, [[COPY]], [[V_MOV_B32_e32_1]], implicit $mode, implicit $exec ; CHECK: [[DEF14:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; CHECK: $sgpr4 = IMPLICIT_DEF ; CHECK: $vgpr0 = COPY [[DEF11]] ; CHECK: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; CHECK: $vgpr1 = COPY [[DEF7]] - ; CHECK: $vgpr0 = COPY [[V_MUL_F32_e32_1]] - ; CHECK: $vgpr1 = COPY [[V_MUL_F32_e32_2]] - ; CHECK: $vgpr2 = COPY [[V_MUL_F32_e32_3]] + ; CHECK: $vgpr0 = COPY %16 + ; CHECK: $vgpr1 = COPY %17 + ; CHECK: $vgpr2 = COPY %18 ; CHECK: dead $sgpr30_sgpr31 = SI_CALL [[DEF14]], @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit-def $vgpr0 - ; CHECK: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e32 [[V_MUL_F32_e32_]], [[DEF8]], implicit $exec - ; CHECK: [[V_MAC_F32_e32_:%[0-9]+]]:vgpr_32 = V_MAC_F32_e32 [[DEF12]], [[DEF9]], [[V_MAC_F32_e32_]], implicit $exec - ; CHECK: dead %26:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $exec - ; CHECK: dead %27:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $exec - ; CHECK: dead %28:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $exec + ; CHECK: %25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, [[DEF8]], implicit $mode, implicit $exec + ; CHECK: %25:vgpr_32 = nofpexcept V_MAC_F32_e32 [[DEF12]], [[DEF9]], %25, implicit $mode, implicit $exec + ; CHECK: dead %26:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec + ; CHECK: dead %27:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $mode, implicit $exec + ; CHECK: dead %28:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $mode, implicit $exec ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, implicit $exec ; CHECK: S_ENDPGM 0 bb.0: @@ -85,7 +85,7 @@ body: | %6:vgpr_32 = IMPLICIT_DEF %7:vgpr_32 = IMPLICIT_DEF %8:vgpr_32 = IMPLICIT_DEF - %9:vgpr_32 = V_MUL_F32_e32 1082130432, %1, implicit $exec + %9:vgpr_32 = nofpexcept V_MUL_F32_e32 1082130432, %1, implicit $mode, implicit $exec %10:vgpr_32 = IMPLICIT_DEF %11:vgpr_32 = IMPLICIT_DEF @@ -106,15 +106,15 @@ body: | %13:vgpr_32 = COPY %12 %14:vgpr_32 = IMPLICIT_DEF %15:vgpr_32 = IMPLICIT_DEF - %16:vgpr_32 = V_MUL_F32_e32 %7, %7, implicit $exec - %17:vgpr_32 = V_MUL_F32_e32 %7, %7, implicit $exec - %18:vgpr_32 = V_MUL_F32_e32 %12, %12, implicit $exec + %16:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec + %17:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec + %18:vgpr_32 = nofpexcept V_MUL_F32_e32 %12, %12, implicit $mode, implicit $exec %19:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %20:vgpr_32 = IMPLICIT_DEF - %21:vgpr_32 = V_ADD_F32_e32 %12, %12, implicit $exec - %22:vgpr_32 = V_MUL_F32_e32 %7, %7, implicit $exec - %23:vgpr_32 = V_MUL_F32_e32 %22, %20, implicit $exec - %19:vgpr_32 = V_MAC_F32_e32 %21, %13, %19, implicit $exec + %21:vgpr_32 = nofpexcept V_ADD_F32_e32 %12, %12, implicit $mode, implicit $exec + %22:vgpr_32 = nofpexcept V_MUL_F32_e32 %7, %7, implicit $mode, implicit $exec + %23:vgpr_32 = nofpexcept V_MUL_F32_e32 %22, %20, implicit $mode, implicit $exec + %19:vgpr_32 = nofpexcept V_MAC_F32_e32 %21, %13, %19, implicit $mode, implicit $exec %24:sreg_64 = IMPLICIT_DEF $vgpr0 = COPY %14 $vgpr0 = COPY %12 @@ -124,11 +124,11 @@ body: | $vgpr2 = COPY %18 $sgpr4 = IMPLICIT_DEF dead $sgpr30_sgpr31 = SI_CALL %24, @foo, csr_amdgpu_highregs, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $sgpr4, implicit killed $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit-def $vgpr0 - %25:vgpr_32 = V_ADD_F32_e32 %9, %8, implicit $exec - %25:vgpr_32 = V_MAC_F32_e32 %15, %10, %25, implicit $exec - %26:vgpr_32 = V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $exec - %27:vgpr_32 = V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $exec - %28:vgpr_32 = V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $exec + %25:vgpr_32 = nofpexcept V_ADD_F32_e32 %9, %8, implicit $mode, implicit $exec + %25:vgpr_32 = nofpexcept V_MAC_F32_e32 %15, %10, %25, implicit $mode, implicit $exec + %26:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $mode, implicit $exec + %27:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $mode, implicit $exec + %28:vgpr_32 = nofpexcept V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $mode, implicit $exec GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir index 859c21d8842fbe..358a331da1a5db 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir @@ -298,10 +298,10 @@ body: | # check for floating point modifiers # GCN-LABEL: name: add_f32_e64 # GCN: %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec -# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec -# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $exec -# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $exec -# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec +# GCN: %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec +# GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec name: add_f32_e64 tracksRegLiveness: true @@ -315,19 +315,19 @@ body: | ; this shouldn't be combined as omod is set %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec - %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $exec + %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec ; this should be combined as all modifiers are default %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec - %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $exec + %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec ; this should be combined as modifiers other than abs|neg are default %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec - %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $exec + %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec ; this shouldn't be combined as modifiers aren't abs|neg %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec - %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec + %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec ... # check for e64 modifiers @@ -532,73 +532,73 @@ body: | # Test instruction which does not have modifiers in VOP1 form but does in DPP form. # GCN-LABEL: name: dpp_vop1 -# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec name: dpp_vop1 tracksRegLiveness: true body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec + %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... # Test instruction which does not have modifiers in VOP2 form but does in DPP form. # GCN-LABEL: name: dpp_min -# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec name: dpp_min tracksRegLiveness: true body: | bb.0: %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $exec + %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec ... # Test an undef old operand # GCN-LABEL: name: dpp_undef_old -# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $exec +# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec name: dpp_undef_old tracksRegLiveness: true body: | bb.0: %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec + %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec ... # Do not combine a dpp mov which writes a physreg. # GCN-LABEL: name: phys_dpp_mov_dst # GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec -# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $exec +# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec name: phys_dpp_mov_dst tracksRegLiveness: true body: | bb.0: $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $exec + %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_old_src # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec -# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $exec +# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_old_src tracksRegLiveness: true body: | bb.0: %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec - %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $exec + %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... # Do not combine a dpp mov which reads a physreg. # GCN-LABEL: name: phys_dpp_mov_src # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec -# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $exec +# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec name: phys_dpp_mov_src tracksRegLiveness: true body: | bb.0: %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec - %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $exec + %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: dpp_reg_sequence_both_combined @@ -817,7 +817,7 @@ body: | # Make sure flags aren't dropped # GCN-LABEL: name: flags_add_f32_e64 -# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $exec +# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec name: flags_add_f32_e64 tracksRegLiveness: true body: | @@ -829,7 +829,7 @@ body: | %2:vgpr_32 = IMPLICIT_DEF %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec - %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $exec + %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir index baa54b492f610e..95a878c1997ff0 100644 --- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir +++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -18,7 +18,7 @@ body: | %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec + %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 ... @@ -42,7 +42,7 @@ body: | %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec + %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 ... @@ -66,7 +66,7 @@ body: | %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) - %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec + %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 ... @@ -173,7 +173,7 @@ body: | bb.1: %0 = IMPLICIT_DEF %2 = IMPLICIT_DEF - %1 = V_ADD_F32_e64 0, killed %0, 0, 1, 0, 0, implicit $exec + %1 = V_ADD_F32_e64 0, killed %0, 0, 1, 0, 0, implicit $mode, implicit $exec %3 = S_ADD_U32 %2, 1, implicit-def $scc S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir index 394df72a1c8309..7bc14939624d89 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir @@ -11,7 +11,7 @@ body: | %1:sreg_32 = IMPLICIT_DEF %2:sreg_32 = IMPLICIT_DEF %3:sreg_32 = IMPLICIT_DEF - %4:vgpr_32 = V_CVT_U32_F32_e64 0, %0:vgpr_32, 0, 0, implicit $exec + %4:vgpr_32 = V_CVT_U32_F32_e64 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %5:sreg_32 = COPY %4:vgpr_32 %6:sreg_32 = S_ADD_I32 %2:sreg_32, %5:sreg_32, implicit-def $scc %7:sreg_32 = S_ADDC_U32 %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir index e76f1be6c485ba..b81556c94ccea9 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir @@ -111,7 +111,7 @@ # literal constant. # CHECK-LABEL: name: add_f32_1.0_one_f16_use -# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $exec +# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec name: add_f32_1.0_one_f16_use alignment: 1 @@ -160,7 +160,7 @@ body: | %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %12 = V_MOV_B32_e32 1065353216, implicit $exec - %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec + %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 @@ -171,8 +171,8 @@ body: | # CHECK-LABEL: name: add_f32_1.0_multi_f16_use # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec -# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $mode, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $mode, implicit $exec name: add_f32_1.0_multi_f16_use @@ -225,8 +225,8 @@ body: | %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1065353216, implicit $exec - %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec - %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec + %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec + %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 @@ -238,8 +238,8 @@ body: | # immediate, and folded into the single f16 use as a literal constant # CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $exec -# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec +# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec name: add_f32_1.0_one_f32_use_one_f16_use alignment: 1 @@ -293,8 +293,8 @@ body: | %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec - %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec - %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec + %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 @@ -307,9 +307,9 @@ body: | # CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use # CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $exec -# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $exec -# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $mode, implicit $exec +# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $mode, implicit $exec +# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec name: add_f32_1.0_one_f32_use_multi_f16_use alignment: 1 @@ -364,9 +364,9 @@ body: | %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec - %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec - %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec - %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec + %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec + %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) @@ -376,8 +376,8 @@ body: | --- # CHECK-LABEL: name: add_i32_1_multi_f16_use # CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec -# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec name: add_i32_1_multi_f16_use @@ -430,8 +430,8 @@ body: | %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1, implicit $exec - %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec - %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec + %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec + %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 @@ -441,9 +441,9 @@ body: | # CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use # CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $exec -# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $exec -# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec +# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec +# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec name: add_i32_m2_one_f32_use_multi_f16_use alignment: 1 @@ -498,9 +498,9 @@ body: | %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 -2, implicit $exec - %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec - %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec - %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec + %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec + %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) @@ -514,8 +514,8 @@ body: | # CHECK-LABEL: name: add_f16_1.0_multi_f32_use # CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec -# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $exec +# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $mode, implicit $exec name: add_f16_1.0_multi_f32_use alignment: 1 @@ -567,8 +567,8 @@ body: | %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 15360, implicit $exec - %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec - %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec + %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec + %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 @@ -581,8 +581,8 @@ body: | # CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use # CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec -# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $mode, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec name: add_f16_1.0_other_high_bits_multi_f16_use alignment: 1 @@ -634,8 +634,8 @@ body: | %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 80886784, implicit $exec - %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec - %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec + %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec + %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 @@ -648,8 +648,8 @@ body: | # CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 # CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec -# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec -# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec +# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $mode, implicit $exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $mode, implicit $exec name: add_f16_1.0_other_high_bits_use_f16_f32 alignment: 1 exposesReturnsTwice: false @@ -700,8 +700,8 @@ body: | %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 305413120, implicit $exec - %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec - %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec + %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec + %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir index e26f0c934fce46..4eef4d64770100 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir @@ -2,7 +2,7 @@ ... # GCN-LABEL: name: no_fold_imm_madak_mac_clamp_f32 # GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec -# GCN-NEXT: %24:vgpr_32 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec +# GCN-NEXT: %24:vgpr_32 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec name: no_fold_imm_madak_mac_clamp_f32 tracksRegLiveness: true @@ -64,7 +64,7 @@ body: | %22 = COPY %29 %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec - %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec + %24 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -73,7 +73,7 @@ body: | --- # GCN-LABEL: name: no_fold_imm_madak_mac_omod_f32 # GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec -# GCN: %24:vgpr_32 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec +# GCN: %24:vgpr_32 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $mode, implicit $exec name: no_fold_imm_madak_mac_omod_f32 tracksRegLiveness: true @@ -135,7 +135,7 @@ body: | %22 = COPY %29 %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec - %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec + %24 = nofpexcept V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $mode, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -144,7 +144,7 @@ body: | --- # GCN: name: no_fold_imm_madak_mad_clamp_f32 # GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec -# GCN: %24:vgpr_32 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec +# GCN: %24:vgpr_32 = nofpexcept V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec name: no_fold_imm_madak_mad_clamp_f32 tracksRegLiveness: true @@ -206,7 +206,7 @@ body: | %22 = COPY %29 %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec - %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec + %24 = nofpexcept V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $mode, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 @@ -215,7 +215,7 @@ body: | --- # GCN: name: no_fold_imm_madak_mad_omod_f32 # GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec -# GCN: %24:vgpr_32 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec +# GCN: %24:vgpr_32 = nofpexcept V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $mode, implicit $exec name: no_fold_imm_madak_mad_omod_f32 tracksRegLiveness: true @@ -277,7 +277,7 @@ body: | %22 = COPY %29 %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec - %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec + %24 = nofpexcept V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $mode, implicit $exec %26 = COPY %29 BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir index 6f0e6e39eea80a..1b87ef241cacea 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir @@ -39,7 +39,7 @@ body: | # GCN-LABEL: name: fma_sgpr_use # GCN: %0:sreg_64_xexec = IMPLICIT_DEF -# GCN-NEXT: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMA_F32 2, %0.sub0, 0, 1073741824, 0, %0.sub1, 0, 0, implicit $exec +# GCN-NEXT: %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMA_F32 2, %0.sub0, 0, 1073741824, 0, %0.sub1, 0, 0, implicit $mode, implicit $exec --- name: fma_sgpr_use body: | @@ -48,6 +48,6 @@ body: | %1:sgpr_32 = COPY %0.sub0 %2:sgpr_32 = COPY %0.sub1 %3:vgpr_32 = COPY %2 - %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %1, 0, 1073741824, 0, %3, 0, 0, implicit $exec + %4:vgpr_32 = nnan ninf nsz arcp contract afn reassoc V_FMAC_F32_e64 2, %1, 0, 1073741824, 0, %3, 0, 0, implicit $mode, implicit $exec DS_WRITE2_B32_gfx9 undef %5:vgpr_32, killed %4, undef %6:vgpr_32, 0, 1, 0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir index f1b5ee3524d952..5f4e6830eb44f9 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir +++ b/llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir @@ -11,7 +11,7 @@ name: flat_atomic_fcmpswap_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode @@ -25,7 +25,7 @@ name: flat_atomic_fcmpswap_x2_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode @@ -39,7 +39,7 @@ name: flat_atomic_fmax_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode @@ -53,7 +53,7 @@ name: flat_atomic_fmax_x2_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode @@ -67,7 +67,7 @@ name: flat_atomic_fmin_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode @@ -81,7 +81,7 @@ name: flat_atomic_fmin_x2_to_s_denorm_mode body: | bb.0: FLAT_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode @@ -95,7 +95,7 @@ name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = FLAT_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_128, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode @@ -109,7 +109,7 @@ name: flat_atomic_fmax_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = FLAT_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode @@ -123,7 +123,7 @@ name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = FLAT_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode @@ -137,7 +137,7 @@ name: flat_atomic_fmin_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = FLAT_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode @@ -151,7 +151,7 @@ name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = FLAT_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode @@ -165,7 +165,7 @@ name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = FLAT_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode @@ -179,7 +179,7 @@ name: global_atomic_fcmpswap_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FCMPSWAP undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode @@ -193,7 +193,7 @@ name: global_atomic_fcmpswap_x2_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FCMPSWAP_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode @@ -207,7 +207,7 @@ name: global_atomic_fmax_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FMAX undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode @@ -221,7 +221,7 @@ name: global_atomic_fmax_x2_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FMAX_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode @@ -235,7 +235,7 @@ name: global_atomic_fmin_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode @@ -249,7 +249,7 @@ name: global_atomic_fmin_x2_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FMIN_X2 undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode @@ -263,7 +263,7 @@ name: global_atomic_fcmpswap_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = GLOBAL_ATOMIC_FCMPSWAP_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode @@ -277,7 +277,7 @@ name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode @@ -291,7 +291,7 @@ name: global_atomic_fmax_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode @@ -305,7 +305,7 @@ name: global_atomic_fmax_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode @@ -319,7 +319,7 @@ name: global_atomic_fmin_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_RTN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode @@ -333,7 +333,7 @@ name: global_atomic_fmin_x2_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_RTN undef %0:vreg_64, undef %1:vreg_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode @@ -347,7 +347,7 @@ name: global_atomic_fcmpswap_saddr_to_s_denorm_mode body: | bb.0: GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode @@ -361,7 +361,7 @@ name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode @@ -375,7 +375,7 @@ name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode @@ -389,7 +389,7 @@ name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode @@ -403,7 +403,7 @@ name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode body: | bb.0: %2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode @@ -417,7 +417,7 @@ name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode body: | bb.0: %2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_waitcnt @@ -430,7 +430,7 @@ body: | bb.0: FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) S_WAITCNT 0 - S_DENORM_MODE 0 + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... # GCN-LABEL: name: flat_fp_atomic_to_s_denorm_mode_valu @@ -442,6 +442,6 @@ name: flat_fp_atomic_to_s_denorm_mode_valu body: | bb.0: FLAT_ATOMIC_FMIN undef %0:vreg_64, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`) - %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $exec - S_DENORM_MODE 0 + %2:vgpr_32 = V_ADD_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, implicit $mode, implicit $exec + S_DENORM_MODE 0, implicit-def $mode, implicit $mode ... diff --git a/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir b/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir index bd6244127e6f9f..a8c82e6cf2545b 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir @@ -11,9 +11,9 @@ name: hazard_buffer_store_v_interp body: | bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - + BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, 0, implicit $exec - $vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir index d0f32f287473c4..830e9aa340fd86 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir @@ -32,7 +32,7 @@ body: | S_SENDMSG 3, implicit $exec, implicit $m0 $m0 = S_MOV_B32 $sgpr8 BUNDLE implicit-def $vgpr0 { - $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $mode, implicit $m0, implicit $exec } S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir index c88be5fdaba3b2..8d02f7a60add20 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir @@ -38,7 +38,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 BUNDLE implicit-def $sgpr0_sgpr1 { $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec } S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir index 5f4b55132112f2..6602c079986e62 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard-kill.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard-kill.mir @@ -19,12 +19,12 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr2, $sgpr3, $sgpr4 - + $sgpr6 = S_MOV_B32 killed $sgpr3 renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0 $m0 = S_MOV_B32 killed renamable $sgpr4 dead renamable $sgpr0 = KILL undef renamable $sgpr2 - renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec + renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $mode, implicit $m0, implicit $exec renamable $sgpr0 = S_MOV_B32 0 S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/hazard.mir b/llvm/test/CodeGen/AMDGPU/hazard.mir index bc62bd9ef087e4..1b53aac3646be6 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard.mir @@ -27,7 +27,7 @@ body: | $m0 = S_MOV_B32 killed $sgpr7 $vgpr5 = IMPLICIT_DEF - $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $mode, implicit $m0, implicit $exec SI_RETURN_TO_EPILOG killed $vgpr5, killed $vgpr0 ... @@ -56,7 +56,7 @@ body: | $m0 = S_MOV_B32 killed $sgpr7 INLINEASM &"; no-op", 1, 327690, def $vgpr5 - $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $mode, implicit $m0, implicit $exec SI_RETURN_TO_EPILOG killed $vgpr5, killed $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir index b9f6c2f79db0f8..0ffed0ae4bfd59 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-callee.mir @@ -19,7 +19,7 @@ liveins: name: entry_callee_wait body: | bb.0: - $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec S_SETPC_B64 killed $sgpr0_sgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir index c8778c73aea380..a8c930d27c9bef 100644 --- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -79,22 +79,22 @@ name: div_fmas body: | bb.0: $vcc = S_MOV_B64 0 - $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec + $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec S_BRANCH %bb.1 bb.1: implicit $vcc = V_CMP_EQ_I32_e32 $vgpr1, $vgpr2, implicit $exec - $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec + $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec S_BRANCH %bb.2 bb.2: $vcc = V_CMP_EQ_I32_e64 $vgpr1, $vgpr2, implicit $exec - $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec + $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec S_BRANCH %bb.3 bb.3: - $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $exec - $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec + $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $mode, implicit $exec + $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec S_ENDPGM 0 ... @@ -128,24 +128,24 @@ name: s_getreg body: | bb.0: - S_SETREG_B32 $sgpr0, 1 - $sgpr1 = S_GETREG_B32 1 + S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode + $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode S_BRANCH %bb.1 bb.1: - S_SETREG_IMM32_B32 0, 1 - $sgpr1 = S_GETREG_B32 1 + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode + $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode S_BRANCH %bb.2 bb.2: - S_SETREG_B32 $sgpr0, 1 + S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode $sgpr1 = S_MOV_B32 0 - $sgpr2 = S_GETREG_B32 1 + $sgpr2 = S_GETREG_B32 1, implicit-def $mode, implicit $mode S_BRANCH %bb.3 bb.3: - S_SETREG_B32 $sgpr0, 0 - $sgpr1 = S_GETREG_B32 1 + S_SETREG_B32 $sgpr0, 0, implicit-def $mode, implicit $mode + $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode S_ENDPGM 0 ... @@ -173,18 +173,18 @@ name: s_setreg body: | bb.0: - S_SETREG_B32 $sgpr0, 1 - S_SETREG_B32 $sgpr1, 1 + S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode + S_SETREG_B32 $sgpr1, 1, implicit-def $mode, implicit $mode S_BRANCH %bb.1 bb.1: - S_SETREG_B32 $sgpr0, 64 - S_SETREG_B32 $sgpr1, 128 + S_SETREG_B32 $sgpr0, 64, implicit-def $mode, implicit $mode + S_SETREG_B32 $sgpr1, 128, implicit-def $mode, implicit $mode S_BRANCH %bb.2 bb.2: - S_SETREG_B32 $sgpr0, 1 - S_SETREG_B32 $sgpr1, 0 + S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode + S_SETREG_B32 $sgpr1, 0, implicit-def $mode, implicit $mode S_ENDPGM 0 ... @@ -342,12 +342,12 @@ name: rfe body: | bb.0: - S_SETREG_B32 $sgpr0, 3 + S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode S_RFE_B64 $sgpr2_sgpr3 S_BRANCH %bb.1 bb.1: - S_SETREG_B32 $sgpr0, 0 + S_SETREG_B32 $sgpr0, 0, implicit-def $mode, implicit $mode S_RFE_B64 $sgpr2_sgpr3 S_ENDPGM 0 @@ -461,22 +461,22 @@ name: v_interp body: | bb.0: $m0 = S_MOV_B32 0 - $vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_BRANCH %bb.1 bb.1: $m0 = S_MOV_B32 0 - $vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec S_BRANCH %bb.2 bb.2: $m0 = S_MOV_B32 0 - $vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_BRANCH %bb.3 bb.3: $m0 = S_MOV_B32 0 - $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/madak-inline-constant.mir b/llvm/test/CodeGen/AMDGPU/madak-inline-constant.mir index 473c69e3839509..935e91a3a864bc 100644 --- a/llvm/test/CodeGen/AMDGPU/madak-inline-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/madak-inline-constant.mir @@ -4,7 +4,7 @@ # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192 # GCN: S_MOV_B32 1082130432 -# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test src1-inlined @@ -15,7 +15,7 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:sreg_32 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $mode, implicit $exec ... @@ -23,7 +23,7 @@ body: | # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192 # GCN: S_MOV_B32 1082130432 -# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test src0-inlined @@ -34,14 +34,14 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:sreg_32 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192 # GCN: S_MOV_B32 1082130432 -# GCN: %3:vgpr_32 = V_MADAK_F32 killed %0, killed %0, 1092616192, implicit $exec +# GCN: %3:vgpr_32 = V_MADAK_F32 killed %0, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test none-inlined @@ -52,14 +52,14 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:sreg_32 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %0, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %0, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192 # GCN: V_MOV_B32_e32 1082130432 -# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test src1-2vgprs-inlined @@ -70,7 +70,7 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec - %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $mode, implicit $exec ... @@ -78,7 +78,7 @@ body: | # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192 # GCN: V_MOV_B32_e32 1082130432 -# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec +# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test src0-2vgprs-inlined @@ -89,14 +89,14 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec - %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192, implicit $exec # GCN: S_MOV_B32 1082130432 -# GCN: V_MADAK_F32 1082130432, killed $vgpr1, 1092616192, implicit $exec +# GCN: V_MADAK_F32 1082130432, killed $vgpr1, 1092616192, implicit $mode, implicit $exec --- name: test src0-phys-vgpr @@ -108,14 +108,14 @@ body: | $vgpr1 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:sgpr_32 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed $vgpr1, 0, killed %18, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed $vgpr1, 0, killed %18, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192, implicit $exec # GCN: S_MOV_B32 1082130432 -# GCN: V_MADAK_F32 1082130432, killed $vgpr0, 1092616192, implicit $exec +# GCN: V_MADAK_F32 1082130432, killed $vgpr0, 1092616192, implicit $mode, implicit $exec --- name: test src1-phys-vgpr @@ -127,13 +127,13 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec %18:sgpr_32 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed $vgpr0, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed $vgpr0, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192, implicit $exec -# GCN: V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %1, 0, 0, implicit $exec +# GCN: V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec --- name: test src0-phys-sgpr @@ -144,13 +144,13 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec - %19:vgpr_32 = V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192, implicit $exec -# GCN: V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %1, 0, 0, implicit $exec +# GCN: V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %1, 0, 0, implicit $mode, implicit $exec --- name: test src1-phys-sgpr @@ -161,14 +161,14 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec - %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: bb.0: # GCN: V_MOV_B32_e32 1092616192, implicit $exec # GCN: $sgpr2 = S_MOV_B32 1082130432 -# GCN: V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec +# GCN: V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $mode, implicit $exec --- name: test src1-phys-sgpr-move @@ -180,6 +180,6 @@ body: | %0:vgpr_32 = COPY $vgpr0 %17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec $sgpr2 = S_MOV_B32 1082130432 - %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec + %19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $mode, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir index 9f49f9fd585264..59ce256dc01274 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards.mir @@ -11,7 +11,7 @@ body: | bb.0: $vgpr0 = V_MOV_B32_e32 1, implicit $exec $vgpr1 = V_MOV_B32_e32 1, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -34,8 +34,8 @@ body: | name: mfma_write_agpr_mfma_read_same_agpr body: | bb.0: - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -47,8 +47,8 @@ body: | name: mfma_write_agpr_mfma_read_overlap body: | bb.0: - $agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr1_agpr2_agpr3_agpr4 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -60,8 +60,8 @@ body: | name: mfma_write_agpr_mfma_read_partial body: | bb.0: - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -75,8 +75,8 @@ body: | name: mfma_write_agpr_mfma_srca_read_overlap body: | bb.0: - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $agpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -90,8 +90,8 @@ body: | name: mfma_write_agpr_mfma_srcb_read_overlap body: | bb.0: - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -105,7 +105,7 @@ body: | name: mfma_4x4_write_agpr_accvgpr_read body: | bb.0: - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec ... --- @@ -126,7 +126,7 @@ body: | name: mfma_16x16_write_agpr_accvgpr_read body: | bb.0: - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec ... --- @@ -155,7 +155,7 @@ body: | name: mfma_32x32_write_agpr_accvgpr_read body: | bb.0: - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec ... --- @@ -167,7 +167,7 @@ body: | name: mfma_4x4_write_agpr_accvgpr_write body: | bb.0: - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec ... --- @@ -185,7 +185,7 @@ body: | name: mfma_16x16_write_agpr_accvgpr_write body: | bb.0: - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec ... --- @@ -211,7 +211,7 @@ body: | name: mfma_32x32_write_agpr_accvgpr_write body: | bb.0: - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec ... --- @@ -222,7 +222,7 @@ body: | name: mfma_4x4_read_srcc_accvgpr_write body: | bb.0: - $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec ... --- @@ -238,7 +238,7 @@ body: | name: mfma_16x16_read_srcc_accvgpr_write body: | bb.0: - $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_16X16X1F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec ... --- @@ -262,7 +262,7 @@ body: | name: mfma_32x32_read_srcc_accvgpr_write body: | bb.0: - $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $exec + $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X2F32 killed $vgpr1, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec ... --- @@ -274,7 +274,7 @@ name: accvgpr_read_write_vgpr_valu_read body: | bb.0: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec - $vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $exec + $vgpr1 = V_ADD_F32_e32 0, killed $vgpr0, implicit $mode, implicit $exec ... --- @@ -287,7 +287,7 @@ name: accvgpr_read_write_vgpr_mfma_read body: | bb.0: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec - $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32 killed $vgpr0, killed $vgpr0, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -312,7 +312,7 @@ name: accvgpr_write_agpr_mfma_read_srcc body: | bb.0: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $vgpr2, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -326,7 +326,7 @@ name: accvgpr_write_agpr_mfma_read_srca body: | bb.0: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -340,7 +340,7 @@ name: accvgpr_write_agpr_mfma_read_srcb body: | bb.0: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $vgpr1, killed $agpr8, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- @@ -369,7 +369,7 @@ name: vcmpx_write_exec_mfma body: | bb.0: implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec - $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $exec + $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec ... --- diff --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir index 9c6ff0b0a628f0..0afc5d1cb1a1ef 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir @@ -291,7 +291,7 @@ body: | bb.0: %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF - S_SETREG_IMM32_B32 0, 1 + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode SI_INIT_M0 -1, implicit-def $m0 DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/mode-register.mir b/llvm/test/CodeGen/AMDGPU/mode-register.mir index a6324410b48883..753e6a3ce0a751 100644 --- a/llvm/test/CodeGen/AMDGPU/mode-register.mir +++ b/llvm/test/CodeGen/AMDGPU/mode-register.mir @@ -17,12 +17,12 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr2 $m0 = S_MOV_B32 killed $sgpr2 $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $exec - $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $m0, implicit $exec - $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec - $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec - $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -41,14 +41,14 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2 $m0 = S_MOV_B32 killed $sgpr2 - S_SETREG_IMM32_B32 3, 2177 + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $exec - $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $m0, implicit $exec - $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec - $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec - $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -68,13 +68,13 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr2 $m0 = S_MOV_B32 killed $sgpr2 $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $exec - $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $m0, implicit $exec - $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec - $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec - S_SETREG_IMM32_B32 0, 2177 - $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr2 = V_MOV_B32_e32 killed $sgpr1, implicit $exec + $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $mode, implicit $m0, implicit $exec + S_SETREG_IMM32_B32 0, 2177, implicit-def $mode, implicit $mode + $vgpr0 = V_ADD_F16_e32 killed $vgpr1, killed $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -89,7 +89,7 @@ name: rtn_default body: | bb.0: liveins: $vgpr1_vgpr2 - $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -106,8 +106,8 @@ name: rtn_from_rtz body: | bb.0: liveins: $vgpr1_vgpr2 - S_SETREG_IMM32_B32 3, 2177 - $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -122,11 +122,11 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr1_vgpr2 - $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec S_BRANCH %bb.1 bb.1: - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_ENDPGM 0 ... --- @@ -150,13 +150,13 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr3, $vgpr4 $m0 = S_MOV_B32 killed $sgpr2 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $m0, implicit $exec - $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec - $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec - $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $exec + $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec + $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -179,14 +179,14 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr3, $vgpr4 $m0 = S_MOV_B32 killed $sgpr2 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec - S_SETREG_IMM32_B32 2, 2049 - $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec - $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $m0, implicit $exec - $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $m0, implicit $exec - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec - $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $m0, implicit $exec - $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec + S_SETREG_IMM32_B32 2, 2049, implicit-def $mode, implicit $mode + $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec + $vgpr0 = V_INTERP_P1LL_F16 0, killed $vgpr0, 2, 1, -1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P2_F16 0, $vgpr2, 2, 1, 0, killed $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec + $vgpr0 = V_INTERP_P2_F16 0, killed $vgpr2, 2, 1, 0, killed $vgpr0, -1, 0, implicit $mode, implicit $m0, implicit $exec + $vgpr0 = V_ADD_F16_e32 killed $sgpr0, killed $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... --- @@ -212,13 +212,13 @@ body: | bb.1: successors: %bb.2 - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec S_BRANCH %bb.2 bb.2: successors: %bb.1, %bb.3 - $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_CBRANCH_VCCZ %bb.1, implicit $vcc S_BRANCH %bb.3 @@ -251,7 +251,7 @@ body: | bb.2: successors: %bb.1, %bb.3 - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec S_CBRANCH_VCCZ %bb.1, implicit $vcc S_BRANCH %bb.3 @@ -267,7 +267,7 @@ body: | bb.5: successors: %bb.1, %bb.6 - S_SETREG_IMM32_B32 3, 2177 + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode S_CBRANCH_VCCZ %bb.1, implicit $vcc S_BRANCH %bb.6 @@ -306,7 +306,7 @@ body: | bb.3: successors: %bb.1, %bb.4 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_CBRANCH_VCCZ %bb.1, implicit $vcc S_BRANCH %bb.4 @@ -337,12 +337,12 @@ body: | bb.2: successors: %bb.3 - S_SETREG_IMM32_B32 3, 2177 + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode S_BRANCH %bb.3 bb.3: successors: %bb.4 - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec S_BRANCH %bb.4 bb.4: @@ -373,7 +373,7 @@ body: | bb.2: successors: %bb.3 - S_SETREG_IMM32_B32 3, 2177 + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode S_BRANCH %bb.3 bb.3: @@ -383,7 +383,7 @@ body: | bb.4: successors: %bb.5 - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec S_BRANCH %bb.5 bb.5: @@ -402,8 +402,8 @@ body: | bb.0: successors: %bb.1 liveins: $vgpr1_vgpr2 - $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $exec - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_BRANCH %bb.1 bb.1: @@ -419,7 +419,7 @@ body: | S_BRANCH %bb.4 bb.4: - $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $m0, implicit $exec + $vgpr1 = V_INTERP_P1LL_F16 0, $vgpr0, 2, 1, 0, 0, 0, implicit $mode, implicit $m0, implicit $exec S_ENDPGM 0 ... --- @@ -446,12 +446,12 @@ body: | bb.2: successors: %bb.3 - S_SETREG_IMM32_B32 3, 2177 + S_SETREG_IMM32_B32 3, 2177, implicit-def $mode, implicit $mode S_BRANCH %bb.3 bb.3: successors: %bb.4 - $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $exec + $vgpr3_vgpr4 = V_FRACT_F64_e32 killed $vgpr3_vgpr4, implicit $mode, implicit $exec S_BRANCH %bb.4 bb.4: diff --git a/llvm/test/CodeGen/AMDGPU/movrels-bug.mir b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir index c5575b2b7387ed..6d2b9ab4422aaa 100644 --- a/llvm/test/CodeGen/AMDGPU/movrels-bug.mir +++ b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir @@ -24,7 +24,7 @@ body: | V_MOVRELD_B32_e32 undef $vgpr2, 0, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit undef $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8(tied-def 4) $m0 = S_MOV_B32 undef $sgpr0 $vgpr1 = V_MOVRELS_B32_e32 undef $vgpr1, implicit $m0, implicit $exec, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 - $vgpr4 = V_MAC_F32_e32 undef $vgpr0, undef $vgpr0, undef $vgpr4, implicit $exec + $vgpr4 = nofpexcept V_MAC_F32_e32 undef $vgpr0, undef $vgpr0, undef $vgpr4, implicit $mode, implicit $exec EXP_DONE 15, undef $vgpr0, killed $vgpr1, killed $vgpr4, undef $vgpr0, 0, 0, 12, implicit $exec S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll new file mode 100644 index 00000000000000..060d66ae842820 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,DEFAULTSIZE %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 < %s | FileCheck -check-prefixes=GCN,ASSUME1024 %s + +; FIXME: Generated test checks do not check metadata at the end of the +; function, so this also includes manually added checks. + +; Test that we can select a statically sized alloca outside of the +; entry block. + +; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an +; alignment less than the stack alignment. +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { +; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_add_u32 s0, s0, s9 +; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8 +; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_lg_u32 s8, 0 +; GCN-NEXT: s_cbranch_scc1 BB0_3 +; GCN-NEXT: ; %bb.1: ; %bb.0 +; GCN-NEXT: s_cmp_lg_u32 s9, 0 +; GCN-NEXT: s_cbranch_scc1 BB0_3 +; GCN-NEXT: ; %bb.2: ; %bb.1 +; GCN-NEXT: s_add_i32 s6, s32, 0x1000 +; GCN-NEXT: s_lshl_b32 s7, s10, 2 +; GCN-NEXT: s_mov_b32 s32, s6 +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_add_i32 s6, s6, s7 +; GCN-NEXT: v_mov_b32_e32 v3, 1 +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, v1, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: BB0_3: ; %bb.2 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_endpgm + +entry: + %cond0 = icmp eq i32 %arg.cond0, 0 + br i1 %cond0, label %bb.0, label %bb.2 + +bb.0: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %cond1 = icmp eq i32 %arg.cond1, 0 + br i1 %cond1, label %bb.1, label %bb.2 + +bb.1: + ; Use the alloca outside of the defining block. + store i32 0, i32 addrspace(5)* %gep0 + store i32 1, i32 addrspace(5)* %gep1 + %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in + %load = load i32, i32 addrspace(5)* %gep2 + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %load, %tid + store i32 %add, i32 addrspace(1)* %out + br label %bb.2 + +bb.2: + store volatile i32 0, i32 addrspace(1)* undef + ret void +} +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112 +; DEFAULTSIZE: ; ScratchSize: 4112 + +; ASSUME1024: .amdhsa_private_segment_fixed_size 1040 +; ASSUME1024: ; ScratchSize: 1040 + +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s9 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8 +; GCN-NEXT: s_add_u32 s0, s0, s9 +; GCN-NEXT: s_addc_u32 s1, s1, 0 +; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-NEXT: s_cbranch_scc1 BB1_2 +; GCN-NEXT: ; %bb.1: ; %bb.0 +; GCN-NEXT: s_add_i32 s6, s32, 0x1000 +; GCN-NEXT: s_andn2_b32 s6, s6, 63 +; GCN-NEXT: s_lshl_b32 s7, s7, 2 +; GCN-NEXT: s_mov_b32 s32, s6 +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_add_i32 s6, s6, s7 +; GCN-NEXT: v_mov_b32_e32 v3, 1 +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, v1, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: BB1_2: ; %bb.1 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + +bb.0: + %alloca = alloca [16 x i32], align 64, addrspace(5) + %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %gep0 + store i32 1, i32 addrspace(5)* %gep1 + %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in + %load = load i32, i32 addrspace(5)* %gep2 + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %load, %tid + store i32 %add, i32 addrspace(1)* %out + br label %bb.1 + +bb.1: + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4160 +; DEFAULTSIZE: ; ScratchSize: 4160 + +; ASSUME1024: .amdhsa_private_segment_fixed_size 1088 +; ASSUME1024: ; ScratchSize: 1088 + + +define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) { +; GCN-LABEL: func_non_entry_block_static_alloca_align4: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s7, s33 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz BB2_3 +; GCN-NEXT: ; %bb.1: ; %bb.0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GCN-NEXT: s_and_b64 exec, exec, vcc +; GCN-NEXT: s_cbranch_execz BB2_3 +; GCN-NEXT: ; %bb.2: ; %bb.1 +; GCN-NEXT: s_add_i32 s6, s32, 0x1000 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s6 +; GCN-NEXT: v_mov_b32_e32 v6, 1 +; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_lshl_add_u32 v2, v4, 2, s6 +; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v5 +; GCN-NEXT: s_mov_b32 s32, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, v2, v3 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: BB2_3: ; %bb.2 +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_sub_u32 s32, s32, 0x400 +; GCN-NEXT: s_mov_b32 s33, s7 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + +entry: + %cond0 = icmp eq i32 %arg.cond0, 0 + br i1 %cond0, label %bb.0, label %bb.2 + +bb.0: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + %cond1 = icmp eq i32 %arg.cond1, 0 + br i1 %cond1, label %bb.1, label %bb.2 + +bb.1: + ; Use the alloca outside of the defining block. + store i32 0, i32 addrspace(5)* %gep0 + store i32 1, i32 addrspace(5)* %gep1 + %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in + %load = load i32, i32 addrspace(5)* %gep2 + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %load, %tid + store i32 %add, i32 addrspace(1)* %out + br label %bb.2 + +bb.2: + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) { +; GCN-LABEL: func_non_entry_block_static_alloca_align64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_add_u32 s4, s32, 0xfc0 +; GCN-NEXT: s_mov_b32 s7, s33 +; GCN-NEXT: s_and_b32 s33, s4, 0xfffff000 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GCN-NEXT: s_add_u32 s32, s32, 0x2000 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_cbranch_execz BB3_2 +; GCN-NEXT: ; %bb.1: ; %bb.0 +; GCN-NEXT: s_add_i32 s6, s32, 0x1000 +; GCN-NEXT: s_andn2_b32 s6, s6, 63 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v5, s6 +; GCN-NEXT: v_mov_b32_e32 v6, 1 +; GCN-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v6, v5, s[0:3], 0 offen offset:4 +; GCN-NEXT: v_lshl_add_u32 v2, v3, 2, s6 +; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GCN-NEXT: v_and_b32_e32 v3, 0x3ff, v4 +; GCN-NEXT: s_mov_b32 s32, s6 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, v2, v3 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: BB3_2: ; %bb.1 +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_sub_u32 s32, s32, 0x2000 +; GCN-NEXT: s_mov_b32 s33, s7 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + +bb.0: + %alloca = alloca [16 x i32], align 64, addrspace(5) + %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0 + %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 + store i32 0, i32 addrspace(5)* %gep0 + store i32 1, i32 addrspace(5)* %gep1 + %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in + %load = load i32, i32 addrspace(5)* %gep2 + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %load, %tid + store i32 %add, i32 addrspace(1)* %out + br label %bb.1 + +bb.1: + store volatile i32 0, i32 addrspace(1)* undef + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir index 01d95ad4c70d1c..f8a140d732066a 100644 --- a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -4,8 +4,8 @@ # FIXME: Is it OK to fold omod for this? # GCN-LABEL: name: omod_inst_flag_nsz_src -# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec -# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec +# GCN: %0:vgpr_32 = nsz nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec +# GCN-NEXT: %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $mode, implicit $exec # GCN-NEXT: S_ENDPGM 0, implicit %1 name: omod_inst_flag_nsz_src tracksRegLiveness: true @@ -18,15 +18,15 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec - %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + %0:vgpr_32 = nsz nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + %1:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %1 ... --- # GCN-LABEL: name: omod_inst_flag_nsz_result -# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec +# GCN: %0:vgpr_32 = nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $mode, implicit $exec # GCN-NEXT: S_ENDPGM 0, implicit %0 name: omod_inst_flag_nsz_result @@ -40,15 +40,15 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec - %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + %0:vgpr_32 = nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + %1:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %1 ... --- # GCN-LABEL: name: omod_inst_flag_nsz_both -# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec +# GCN: %0:vgpr_32 = nsz nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $mode, implicit $exec # GCN-NEXT: S_ENDPGM 0, implicit %0 name: omod_inst_flag_nsz_both @@ -62,7 +62,7 @@ body: | bb.0: liveins: $vgpr0, $vgpr1 - %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec - %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec + %0:vgpr_32 = nsz nofpexcept V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + %1:vgpr_32 = nsz nofpexcept V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $mode, implicit $exec S_ENDPGM 0, implicit %1 ... diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir index 66bd4c163c669a..837389d6aa7ae5 100644 --- a/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir +++ b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir @@ -15,7 +15,7 @@ body: | $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3 $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 S_BARRIER - $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 undef $vgpr0, undef $vgpr0, 0, 0, 0, 2, implicit $exec + $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 undef $vgpr0, undef $vgpr0, 0, 0, 0, 2, implicit $mode, implicit $exec $vgpr0 = V_ACCVGPR_READ_B32 $agpr31, implicit $exec BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, 0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-bogus-subrange-comparison.mir b/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-bogus-subrange-comparison.mir index 90dd8d50cea4f6..17652d49dd3695 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-bogus-subrange-comparison.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-bogus-subrange-comparison.mir @@ -41,12 +41,12 @@ body: | bb.0: successors: %bb.1(0x80000000) - %27:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, undef %28:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec - %30:vgpr_32 = V_MIN_F32_e32 1065353216, killed %27, implicit $exec - %31:sreg_64_xexec = V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %30, 0, implicit $exec + %27:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, undef %28:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + %30:vgpr_32 = V_MIN_F32_e32 1065353216, killed %27, implicit $exec, implicit $mode + %31:sreg_64_xexec = V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %30, 0, implicit $exec, implicit $mode %32:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec %34:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %32, killed %31, implicit $exec - %35:sreg_64_xexec = V_CMP_LT_F32_e64 0, 0, 0, killed %34, 0, implicit $exec + %35:sreg_64_xexec = V_CMP_LT_F32_e64 0, 0, 0, killed %34, 0, implicit $exec, implicit $mode %23:sreg_64 = S_MOV_B64 0 %38:sreg_32_xm0 = S_MOV_B32 0 %107:sreg_64 = COPY killed %23 @@ -98,7 +98,7 @@ body: | %51:vgpr_32 = COPY killed %2.sub1 %53:vgpr_32 = COPY killed %51 - %53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-lo) 0, undef %100.sub1:vreg_128, %53, implicit $exec + %53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-lo) 0, undef %100.sub1:vreg_128, %53, implicit $exec, implicit $mode %88:vreg_128 = COPY killed %85 %88.sub1:vreg_128 = COPY killed %53 %110:vreg_128 = COPY killed %88 @@ -113,8 +113,8 @@ body: | %59:sreg_64 = V_CMP_LT_U32_e64 3, killed %95, implicit $exec %11:vreg_128 = COPY %91 %60:vgpr_32 = V_MOV_B32_e32 953267991, implicit $exec - %61:sreg_64_xexec = V_CMP_GT_F32_e64 0, %91.sub2, 0, %60, 0, implicit $exec - %62:sreg_64 = V_CMP_NGT_F32_e64 0, %91.sub2, 0, killed %60, 0, implicit $exec + %61:sreg_64_xexec = V_CMP_GT_F32_e64 0, %91.sub2, 0, %60, 0, implicit $exec, implicit $mode + %62:sreg_64 = V_CMP_NGT_F32_e64 0, %91.sub2, 0, killed %60, 0, implicit $exec, implicit $mode %13:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %61, implicit $exec %63:sreg_64 = S_OR_B64 killed %59, killed %62, implicit-def dead $scc %14:sreg_64 = S_AND_B64 $exec, killed %63, implicit-def $scc @@ -142,7 +142,7 @@ body: | successors: %bb.9(0x80000000) %65:vgpr_32 = COPY killed %11.sub1 - %67:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel32-hi) 0, 0, killed %91.sub2, 0, killed %65, 0, 0, implicit $exec + %67:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel32-hi) 0, 0, killed %91.sub2, 0, killed %65, 0, 0, implicit $exec, implicit $mode undef %102.sub1:vreg_128 = COPY killed %67 %93:vreg_128 = COPY killed %102 %111:vreg_128 = COPY killed %93 @@ -150,7 +150,7 @@ body: | bb.9: $exec = S_OR_B64 $exec, killed %16, implicit-def $scc %92:vreg_128 = COPY killed %111 - %77:vgpr_32 = V_MUL_F32_e32 target-flags(amdgpu-gotprel32-lo) 0, killed %92.sub1, implicit $exec + %77:vgpr_32 = V_MUL_F32_e32 target-flags(amdgpu-gotprel32-lo) 0, killed %92.sub1, implicit $exec, implicit $mode undef %106.sub0:vreg_128 = COPY %77 %106.sub1:vreg_128 = COPY %77 %106.sub2:vreg_128 = COPY %77 diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-different-subreg-diffs.mir b/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-different-subreg-diffs.mir index 2798d99999ced9..58f892b387217d 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-different-subreg-diffs.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-followcopychain-different-subreg-diffs.mir @@ -18,14 +18,14 @@ body: | %16:sgpr_32 = COPY killed $sgpr0 $m0 = S_MOV_B32 killed %16 - %19:vgpr_32 = V_INTERP_P2_F32 undef %19, undef %21:vgpr_32, 0, 1, implicit $m0, implicit $exec + %19:vgpr_32 = V_INTERP_P2_F32 undef %19, undef %21:vgpr_32, 0, 1, implicit $m0, implicit $exec, implicit $mode %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %23:vgpr_32 = V_MAD_F32 0, killed %19, 0, 0, 0, 0, 0, 0, implicit $exec + %23:vgpr_32 = V_MAD_F32 0, killed %19, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode undef %75.sub0:vreg_128 = COPY %24 %75.sub2:vreg_128 = COPY %24 - %26:vgpr_32 = nnan arcp contract reassoc V_MUL_F32_e64 0, 0, 0, killed %23, 1, 0, implicit $exec - %0:vgpr_32 = V_MUL_F32_e32 0, killed %26, implicit $exec - %28:sreg_64 = V_CMP_NLT_F32_e64 0, 0, 0, killed %0, 0, implicit $exec + %26:vgpr_32 = nnan arcp contract reassoc V_MUL_F32_e64 0, 0, 0, killed %23, 1, 0, implicit $exec, implicit $mode + %0:vgpr_32 = V_MUL_F32_e32 0, killed %26, implicit $exec, implicit $mode + %28:sreg_64 = V_CMP_NLT_F32_e64 0, 0, 0, killed %0, 0, implicit $exec, implicit $mode %89:vreg_128 = IMPLICIT_DEF %91:sreg_64 = COPY $exec, implicit-def $exec %92:sreg_64 = S_AND_B64 %91, %28, implicit-def dead $scc @@ -70,14 +70,14 @@ body: | S_BRANCH %bb.6 bb.6: - %43:vgpr_32 = V_MAD_F32 0, %8.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec + %43:vgpr_32 = V_MAD_F32 0, %8.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec, implicit $mode %44:vgpr_32 = COPY killed %43 - %44:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 0, killed %8.sub2, %44, implicit $exec - %45:vgpr_32 = V_ADD_F32_e32 0, killed %44, implicit $exec + %44:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 0, killed %8.sub2, %44, implicit $exec, implicit $mode + %45:vgpr_32 = V_ADD_F32_e32 0, killed %44, implicit $exec, implicit $mode %47:vgpr_32 = V_MOV_B32_e32 1107296256, implicit $exec - %48:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %45, 0, killed %47, 0, 1056964608, 0, 0, implicit $exec - %49:vgpr_32 = V_FLOOR_F32_e32 killed %48, implicit $exec - %50:vgpr_32 = V_CVT_I32_F32_e32 killed %49, implicit $exec + %48:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %45, 0, killed %47, 0, 1056964608, 0, 0, implicit $exec, implicit $mode + %49:vgpr_32 = V_FLOOR_F32_e32 killed %48, implicit $exec, implicit $mode + %50:vgpr_32 = V_CVT_I32_F32_e32 killed %49, implicit $exec, implicit $mode %81:vgpr_32 = V_ADD_I32_e32 1, killed %50, implicit-def dead $vcc, implicit $exec dead %82:vgpr_32 = V_MIN_I32_e32 31, killed %81, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir index 9b1bb7f2fb7e5d..e4e33026da4b0d 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -188,7 +188,7 @@ body: | %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) %46 = V_AND_B32_e32 1, killed %45, implicit $exec %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4) - %25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec + %25 = nofpexcept V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $mode, implicit $exec %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec %62 = IMPLICIT_DEF @@ -211,13 +211,13 @@ body: | S_BRANCH %bb.31 bb.30: - %33 = V_MAD_F32 1, killed %53.sub0, 0, undef %34, 0, 0, 0, 0, implicit $exec - %35 = V_MAC_F32_e32 killed %33, undef %36, undef %35, implicit $exec - %38 = V_MAX_F32_e32 0, killed %35, implicit $exec - %39 = V_LOG_F32_e32 killed %38, implicit $exec - %40 = V_MUL_F32_e32 killed %39, undef %41, implicit $exec - %42 = V_EXP_F32_e32 killed %40, implicit $exec - dead %43 = V_MUL_F32_e32 killed %42, undef %44, implicit $exec + %33 = nofpexcept V_MAD_F32 1, killed %53.sub0, 0, undef %34, 0, 0, 0, 0, implicit $mode, implicit $exec + %35 = nofpexcept V_MAC_F32_e32 killed %33, undef %36, undef %35, implicit $mode, implicit $exec + %38 = nofpexcept V_MAX_F32_e32 0, killed %35, implicit $mode, implicit $exec + %39 = nofpexcept V_LOG_F32_e32 killed %38, implicit $mode, implicit $exec + %40 = nofpexcept V_MUL_F32_e32 killed %39, undef %41, implicit $mode, implicit $exec + %42 = nofpexcept V_EXP_F32_e32 killed %40, implicit $mode, implicit $exec + dead %43 = nofpexcept V_MUL_F32_e32 killed %42, undef %44, implicit $mode, implicit $exec %63 = COPY killed %51 bb.31: diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir index ad56ba08583ef8..6d1df163ec8240 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join.mir @@ -113,10 +113,10 @@ body: | bb.1: %30 = V_MOV_B32_e32 1036831949, implicit $exec - %31 = V_ADD_F32_e32 %30, %1.sub3, implicit $exec - %33 = V_ADD_F32_e32 %30, %1.sub2, implicit $exec - %35 = V_ADD_F32_e32 %30, %1.sub1, implicit $exec - %37 = V_ADD_F32_e32 killed %30, killed %1.sub0, implicit $exec + %31 = nofpexcept V_ADD_F32_e32 %30, %1.sub3, implicit $mode, implicit $exec + %33 = nofpexcept V_ADD_F32_e32 %30, %1.sub2, implicit $mode, implicit $exec + %35 = nofpexcept V_ADD_F32_e32 %30, %1.sub1, implicit $mode, implicit $exec + %37 = nofpexcept V_ADD_F32_e32 killed %30, killed %1.sub0, implicit $mode, implicit $exec undef %56.sub0 = COPY killed %37 %56.sub1 = COPY killed %35 %56.sub2 = COPY killed %33 @@ -141,10 +141,10 @@ body: | %7 = COPY killed %61 %6 = COPY killed %60 %8 = S_ADD_I32 killed %6, 1, implicit-def dead $scc - %44 = V_ADD_F32_e32 %43, %7.sub3, implicit $exec - %46 = V_ADD_F32_e32 %43, %7.sub2, implicit $exec - %48 = V_ADD_F32_e32 %43, %7.sub1, implicit $exec - %50 = V_ADD_F32_e32 %43, killed %7.sub0, implicit $exec + %44 = nofpexcept V_ADD_F32_e32 %43, %7.sub3, implicit $mode, implicit $exec + %46 = nofpexcept V_ADD_F32_e32 %43, %7.sub2, implicit $mode, implicit $exec + %48 = nofpexcept V_ADD_F32_e32 %43, %7.sub1, implicit $mode, implicit $exec + %50 = nofpexcept V_ADD_F32_e32 %43, killed %7.sub0, implicit $mode, implicit $exec undef %57.sub0 = COPY killed %50 %57.sub1 = COPY killed %48 %57.sub2 = COPY %46 diff --git a/llvm/test/CodeGen/AMDGPU/regcoalesce-prune.mir b/llvm/test/CodeGen/AMDGPU/regcoalesce-prune.mir index 96bc78cbbd5458..5664c7005b5ddb 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalesce-prune.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalesce-prune.mir @@ -23,9 +23,9 @@ body: | %6 : vreg_64 = COPY killed %4 bb.2: - %2 : vgpr_32 = V_CVT_F32_I32_e32 killed %5.sub1, implicit $exec + %2 : vgpr_32 = V_CVT_F32_I32_e32 killed %5.sub1, implicit $mode, implicit $exec bb.3: - %3 : vgpr_32 = V_CVT_F32_I32_e32 killed %6.sub1, implicit $exec + %3 : vgpr_32 = V_CVT_F32_I32_e32 killed %6.sub1, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir b/llvm/test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir index d437f202d20060..1733c5aca2a2db 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir @@ -88,7 +88,7 @@ body: | %70:vreg_128 = COPY killed %90 %33:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %34:sgpr_128, 16, 0, 0 :: (dereferenceable invariant load 16) - %2:vgpr_32 = V_ADD_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %33.sub3, 0, 0, implicit $exec + %2:vgpr_32 = V_ADD_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %33.sub3, 0, 0, implicit $exec, implicit $mode S_CBRANCH_SCC1 %bb.5, implicit undef $scc S_BRANCH %bb.4 diff --git a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir index 9693f61a45ff04..7f45c4058221c0 100644 --- a/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -11,68 +11,68 @@ tracksRegLiveness: true body: | bb.0: successors: %bb.1, %bb.2 - - %21:vgpr_32 = V_TRUNC_F32_e32 undef %22:vgpr_32, implicit $exec - %23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec + + %21:vgpr_32 = nofpexcept V_TRUNC_F32_e32 undef %22:vgpr_32, implicit $mode, implicit $exec + %23:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 killed %21, implicit $mode, implicit $exec %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec undef %109.sub1:vreg_128 = COPY %108 %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sgpr_128, 3044, 0, 0 :: (dereferenceable invariant load 4) S_CMP_EQ_U32 killed %28, 0, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit killed $scc - + bb.1: %138:vreg_128 = COPY killed %109 S_BRANCH %bb.9 - + bb.2: successors: %bb.3, %bb.4 - + S_CBRANCH_SCC0 %bb.4, implicit undef $scc - + bb.3: %136:vreg_128 = COPY killed %109 S_BRANCH %bb.5 - + bb.4: %136:vreg_128 = COPY killed %109 - + bb.5: successors: %bb.6, %bb.8 - + %110:vreg_128 = COPY killed %136 dead %32:sreg_32_xm0 = S_MOV_B32 0 %111:vreg_128 = COPY %110 %111.sub3:vreg_128 = COPY undef %32 S_CBRANCH_SCC1 %bb.8, implicit undef $scc S_BRANCH %bb.6 - + bb.6: %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sgpr_128, 2708, 0, 0 :: (dereferenceable invariant load 4) - %39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec - %40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec - %41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec - %43:vgpr_32 = V_MUL_F32_e32 0, %39, implicit $exec + %39:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %40:vgpr_32 = nofpexcept V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %41:vgpr_32 = nofpexcept V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $mode, implicit $exec + %43:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %39, implicit $mode, implicit $exec %44:vgpr_32 = COPY killed %43 - %44:vgpr_32 = V_MAC_F32_e32 0, killed %41, %44, implicit $exec + %44:vgpr_32 = nofpexcept V_MAC_F32_e32 0, killed %41, %44, implicit $mode, implicit $exec %47:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec %46:vgpr_32 = COPY killed %47 - %46:vgpr_32 = V_MAC_F32_e32 0, killed %39, %46, implicit $exec + %46:vgpr_32 = nofpexcept V_MAC_F32_e32 0, killed %39, %46, implicit $mode, implicit $exec undef %115.sub0:vreg_128 = COPY %46 %115.sub1:vreg_128 = COPY killed %46 %115.sub2:vreg_128 = COPY killed %44 %50:sreg_64_xexec = V_CMP_NE_U32_e64 0, killed %36, implicit $exec dead %118:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %137:vreg_128 = IMPLICIT_DEF - + bb.7: successors: %bb.7, %bb.8 - + %119:vreg_128 = COPY killed %137 %121:vreg_128 = COPY killed %119 %121.sub3:vreg_128 = COPY undef %32 - %56:vgpr_32 = V_ADD_F32_e32 %115.sub2, %121.sub2, implicit $exec - %59:vgpr_32 = V_ADD_F32_e32 %115.sub1, %121.sub1, implicit $exec - %62:vgpr_32 = V_ADD_F32_e32 %115.sub0, killed %121.sub0, implicit $exec + %56:vgpr_32 = nofpexcept V_ADD_F32_e32 %115.sub2, %121.sub2, implicit $mode, implicit $exec + %59:vgpr_32 = nofpexcept V_ADD_F32_e32 %115.sub1, %121.sub1, implicit $mode, implicit $exec + %62:vgpr_32 = nofpexcept V_ADD_F32_e32 %115.sub0, killed %121.sub0, implicit $mode, implicit $exec undef %117.sub0:vreg_128 = COPY killed %62 %117.sub1:vreg_128 = COPY killed %59 %117.sub2:vreg_128 = COPY killed %56 @@ -81,118 +81,118 @@ body: | %137:vreg_128 = COPY killed %117 S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc S_BRANCH %bb.8 - + bb.8: dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sgpr_128, 2704, 0, 0 :: (dereferenceable invariant load 4) %138:vreg_128 = COPY killed %111 - + bb.9: %113:vreg_128 = COPY killed %138 S_CBRANCH_SCC1 %bb.18, implicit undef $scc S_BRANCH %bb.10 - + bb.10: S_CBRANCH_SCC1 %bb.12, implicit undef $scc S_BRANCH %bb.11 - + bb.11: - + bb.12: successors: %bb.13, %bb.18 - + S_CBRANCH_SCC1 %bb.18, implicit undef $scc S_BRANCH %bb.13 - + bb.13: successors: %bb.14, %bb.17 - + S_CBRANCH_SCC1 %bb.17, implicit undef $scc S_BRANCH %bb.14 - + bb.14: S_CBRANCH_SCC1 %bb.16, implicit undef $scc S_BRANCH %bb.15 - + bb.15: - + bb.16: - + bb.17: - + bb.18: S_CBRANCH_SCC1 %bb.26, implicit undef $scc S_BRANCH %bb.19 - + bb.19: S_CBRANCH_SCC1 %bb.26, implicit undef $scc S_BRANCH %bb.20 - + bb.20: S_CBRANCH_SCC1 %bb.25, implicit undef $scc S_BRANCH %bb.21 - + bb.21: successors: %bb.22, %bb.24 - + S_CBRANCH_SCC1 %bb.24, implicit undef $scc S_BRANCH %bb.22 - + bb.22: successors: %bb.23, %bb.24 - + S_CBRANCH_SCC1 %bb.24, implicit undef $scc S_BRANCH %bb.23 - + bb.23: - + bb.24: - + bb.25: - + bb.26: S_CBRANCH_SCC1 %bb.33, implicit undef $scc S_BRANCH %bb.27 - + bb.27: S_CBRANCH_SCC1 %bb.33, implicit undef $scc S_BRANCH %bb.28 - + bb.28: dead %77:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - %78:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %113.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 1065353216, 0, 0, implicit $exec + %78:vgpr_32 = nnan arcp contract reassoc nofpexcept V_MAD_F32 0, killed %113.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 1065353216, 0, 0, implicit $mode, implicit $exec dead %80:sreg_32_xm0 = S_MOV_B32 0 - dead %82:vgpr_32 = V_MUL_F32_e32 killed %78, %78, implicit $exec + dead %82:vgpr_32 = nofpexcept V_MUL_F32_e32 killed %78, %78, implicit $mode, implicit $exec dead %126:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec dead %125:vreg_128 = IMPLICIT_DEF dead %91:sreg_32_xm0 = S_MOV_B32 2143289344 %96:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc %139:vreg_128 = IMPLICIT_DEF - + bb.29: successors: %bb.30, %bb.31 - + dead %127:vreg_128 = COPY killed %139 S_CBRANCH_SCC0 %bb.31, implicit undef $scc - + bb.30: S_BRANCH %bb.32 - + bb.31: successors: %bb.32, %bb.34 - + $vcc = COPY %96 S_CBRANCH_VCCNZ %bb.34, implicit killed $vcc S_BRANCH %bb.32 - + bb.32: dead %130:vreg_128 = IMPLICIT_DEF dead %128:vreg_128 = COPY undef %130 %139:vreg_128 = IMPLICIT_DEF S_BRANCH %bb.29 - + bb.33: S_ENDPGM 0 - + bb.34: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir index d7892a0c975925..d03f60cc683552 100644 --- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir @@ -2,7 +2,7 @@ --- # GCN-LABEL: name: mac_invalid_operands -# GCN: undef %18.sub0:vreg_128 = V_MAC_F32_e32 undef %3:vgpr_32, undef %9:vgpr_32, undef %18.sub0, implicit $exec +# GCN: undef %18.sub0:vreg_128 = nofpexcept V_MAC_F32_e32 undef %3:vgpr_32, undef %9:vgpr_32, undef %18.sub0, implicit $mode, implicit $exec name: mac_invalid_operands alignment: 1 @@ -38,14 +38,14 @@ body: | bb.0: successors: %bb.2, %bb.1 - %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, implicit $exec + %7 = nofpexcept V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, implicit $mode, implicit $exec $vcc = COPY killed %7 S_CBRANCH_VCCZ %bb.2, implicit killed $vcc bb.1: successors: %bb.3 - %4 = V_ADD_F32_e32 undef %6, undef %5, implicit $exec + %4 = nofpexcept V_ADD_F32_e32 undef %6, undef %5, implicit $mode, implicit $exec undef %12.sub0 = COPY killed %4 %17 = COPY killed %12 S_BRANCH %bb.3 @@ -53,7 +53,7 @@ body: | bb.2: successors: %bb.3 - %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit $exec + %8 = nofpexcept V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit $mode, implicit $exec undef %13.sub0 = COPY %8 %13.sub1 = COPY %8 %13.sub2 = COPY killed %8 @@ -77,13 +77,13 @@ body: | # GCN-LABEL: name: vreg_does_not_dominate -# GCN: undef %8.sub1:vreg_128 = V_MAC_F32_e32 undef %2:vgpr_32, undef %1:vgpr_32, undef %8.sub1, implicit $exec +# GCN: undef %8.sub1:vreg_128 = nofpexcept V_MAC_F32_e32 undef %2:vgpr_32, undef %1:vgpr_32, undef %8.sub1, implicit $mode, implicit $exec # GCN: undef %7.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec # GCN: undef %9.sub2:vreg_128 = COPY %7.sub0 -# GCN: undef %6.sub3:vreg_128 = V_ADD_F32_e32 undef %3:vgpr_32, undef %3:vgpr_32, implicit $exec -# GCN: undef %7.sub0:vreg_128 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit $exec -# GCN: %8.sub1:vreg_128 = V_ADD_F32_e32 %8.sub1, %8.sub1, implicit $exec +# GCN: undef %6.sub3:vreg_128 = nofpexcept V_ADD_F32_e32 undef %3:vgpr_32, undef %3:vgpr_32, implicit $mode, implicit $exec +# GCN: undef %7.sub0:vreg_128 = nofpexcept V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec +# GCN: %8.sub1:vreg_128 = nofpexcept V_ADD_F32_e32 %8.sub1, %8.sub1, implicit $mode, implicit $exec # GCN: BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, # GCN: BUFFER_STORE_DWORD_OFFEN %9.sub2, %0, @@ -117,7 +117,7 @@ body: | %5 = COPY $sgpr30_sgpr31 %0 = COPY $vgpr0 - undef %6.sub1 = V_MAC_F32_e32 undef %2, undef %1, undef %6.sub1, implicit $exec + undef %6.sub1 = nofpexcept V_MAC_F32_e32 undef %2, undef %1, undef %6.sub1, implicit $mode, implicit $exec %6.sub0 = V_MOV_B32_e32 0, implicit $exec %6.sub2 = COPY %6.sub0 S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc @@ -126,9 +126,9 @@ body: | bb.1: successors: %bb.2 - %6.sub3 = V_ADD_F32_e32 undef %3, undef %3, implicit $exec - %6.sub0 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit $exec - %6.sub1 = V_ADD_F32_e32 %6.sub1, %6.sub1, implicit $exec + %6.sub3 = nofpexcept V_ADD_F32_e32 undef %3, undef %3, implicit $mode, implicit $exec + %6.sub0 = nofpexcept V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %6.sub1 = nofpexcept V_ADD_F32_e32 %6.sub1, %6.sub1, implicit $mode, implicit $exec %6.sub2 = COPY %6.sub0 bb.2: @@ -143,7 +143,7 @@ body: | # GCN-LABEL: name: inf_loop_tied_operand # GCN: bb.0: -# GCN-NEXT: undef %2.sub0:vreg_128 = V_MAC_F32_e32 1073741824, undef %0:vgpr_32, undef %2.sub0, implicit $exec +# GCN-NEXT: undef %2.sub0:vreg_128 = nofpexcept V_MAC_F32_e32 1073741824, undef %0:vgpr_32, undef %2.sub0, implicit $mode, implicit $exec # GCN-NEXT: dead undef %3.sub1:vreg_128 = COPY %2.sub0 name: inf_loop_tied_operand @@ -154,7 +154,7 @@ registers: - { id: 2, class: vreg_128, preferred-register: '' } body: | bb.0: - %1 = V_MAC_F32_e32 1073741824, undef %0, undef %1, implicit $exec + %1 = nofpexcept V_MAC_F32_e32 1073741824, undef %0, undef %1, implicit $mode, implicit $exec undef %2.sub0 = COPY %1 %2.sub1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll index 1903f89789b162..1e69d4551359c6 100644 --- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -390,7 +390,7 @@ define amdgpu_kernel void @all_local_size(i64 addrspace(1)* nocapture readnone % ; CHECK-LABEL: @partial_load_group_size_x( ; CHECK-NEXT: %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() ; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 -; CHECK-NEXT: %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 1 +; CHECK-NEXT: %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 4 ; CHECK-NEXT: store i8 %group.size.x.lo, i8 addrspace(1)* %out, align 1 define amdgpu_kernel void @partial_load_group_size_x(i8 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() @@ -400,6 +400,19 @@ define amdgpu_kernel void @partial_load_group_size_x(i8 addrspace(1)* %out) #0 ! ret void } +; CHECK-LABEL: @partial_load_group_size_x_explicit_callsite_align( +; CHECK-NEXT: %dispatch.ptr = tail call align 2 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 +; CHECK-NEXT: %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 2 +; CHECK-NEXT: store i8 %group.size.x.lo, i8 addrspace(1)* %out, align 1 +define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(i8 addrspace(1)* %out) #0 !reqd_work_group_size !0 { + %dispatch.ptr = tail call align 2 i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 + %group.size.x.lo = load i8, i8 addrspace(4)* %gep.group.size.x, align 1 + store i8 %group.size.x.lo, i8 addrspace(1)* %out + ret void +} + ; TODO: Should be able to handle this ; CHECK-LABEL: @load_group_size_xy_i32( ; CHECK: %group.size.xy = load i32, diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir new file mode 100644 index 00000000000000..40bdf8e643175e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s +--- +name: s_add_co_pseudo_test +tracksRegLiveness: true +body: | + + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2 + ; GCN-LABEL: name: s_add_co_pseudo_test + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0, $sgpr1, $sgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]] + ; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec + ; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]] + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167 + ; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec + ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]] + ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec + ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736 + ; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec + ; GCN: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]] + ; GCN: [[V_ADDC_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY8]], killed [[V_MUL_LO_U32_2]], [[V_ADDC_U32_e64_1]], 0, implicit $exec + %0:vgpr_32 = COPY $vgpr0 + %6:sreg_32 = COPY %0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = COPY $vgpr2 + %3:sreg_32 = COPY $sgpr0 + %4:sreg_32 = COPY $sgpr1 + %5:sreg_32 = COPY $sgpr2 + %20:vgpr_32 = COPY %3 + %7:sreg_32 = S_MUL_I32 %6, %4 + %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec + %8:sreg_32 = S_MUL_HI_U32 %4, %5 + %11:sreg_32 = S_MOV_B32 -614296167 + %12:sreg_32 = S_MUL_I32 %6, %3 + %14:sreg_32, %13:sreg_64_xexec = S_ADD_CO_PSEUDO killed %12, killed %11, killed %10, implicit-def dead $scc + %15:sreg_32 = S_MUL_HI_U32 %4, %14 + %16:sreg_32 = S_MOV_B32 -181084736 + %17:sreg_32 = S_MUL_I32 %15, %16 + %19:sreg_32, %18:sreg_64_xexec = S_ADD_CO_PSEUDO killed %16, killed %17, killed %13, implicit-def dead $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir index 88cb57ca0cdc73..fd435d4adbe645 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir @@ -33,13 +33,13 @@ body: | ; CHECK: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] - ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec - ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec + ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec + ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec ; CHECK: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec - ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec + ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec ; CHECK: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec + ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec ; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec @@ -55,11 +55,11 @@ body: | ; CHECK: GLOBAL_STORE_DWORD [[DEF7]], [[V_MOV_B32_e32_1]], 0, 0, 0, 0, implicit $exec ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode ; CHECK: DBG_VALUE ; CHECK: DBG_VALUE ; CHECK: DBG_VALUE - ; CHECK: S_SETREG_IMM32_B32 0, 1 + ; CHECK: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode ; CHECK: bb.2: ; CHECK: S_NOP 0, implicit [[COPY]] ; CHECK: S_NOP 0, implicit [[DEF8]] @@ -74,8 +74,8 @@ body: | undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec %5:vreg_64 = COPY %2 - undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec - %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec + undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $mode, implicit $exec + %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $mode, implicit $exec %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, 0, implicit $exec %8:vreg_64 = IMPLICIT_DEF %9:vreg_64 = IMPLICIT_DEF @@ -88,8 +88,8 @@ body: | %16:vgpr_32 = IMPLICIT_DEF %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec - %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec + undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $mode, implicit $exec + %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $mode, implicit $exec GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, 0, implicit $exec %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, 0, implicit $exec @@ -101,11 +101,11 @@ body: | GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, 0, implicit $exec bb.1: - S_SETREG_IMM32_B32 0, 1 + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode DBG_VALUE DBG_VALUE DBG_VALUE - S_SETREG_IMM32_B32 0, 1 + S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode bb.2: S_NOP 0, implicit %0 diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir index 9d3144196eb17e..aac40b73a41e03 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -275,7 +275,7 @@ body: | %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec %77:vgpr_32 = IMPLICIT_DEF %78:vgpr_32 = IMPLICIT_DEF - %79:vgpr_32 = V_MUL_F32_e32 0, %77, implicit $exec + %79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77, implicit $mode, implicit $exec %80:vgpr_32 = IMPLICIT_DEF %81:vgpr_32 = IMPLICIT_DEF %84:vgpr_32 = IMPLICIT_DEF @@ -288,9 +288,9 @@ body: | %87:vgpr_32 = IMPLICIT_DEF %88:vgpr_32 = IMPLICIT_DEF %90:vgpr_32 = IMPLICIT_DEF - %91:vgpr_32, dead %92:sreg_64 = V_DIV_SCALE_F32 %90, %90, 1065353216, implicit $exec - %95:vgpr_32 = V_FMA_F32 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit $exec - %96:vgpr_32, %97:sreg_64 = V_DIV_SCALE_F32 1065353216, %90, 1065353216, implicit $exec + %91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, 1065353216, implicit $mode, implicit $exec + %95:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit $mode, implicit $exec + %96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32 1065353216, %90, 1065353216, implicit $mode, implicit $exec %98:vgpr_32 = IMPLICIT_DEF %99:vgpr_32 = IMPLICIT_DEF %100:vgpr_32 = IMPLICIT_DEF @@ -299,18 +299,18 @@ body: | %103:vgpr_32 = IMPLICIT_DEF %104:vgpr_32 = IMPLICIT_DEF %105:vgpr_32 = IMPLICIT_DEF - %106:vgpr_32, dead %107:sreg_64 = V_DIV_SCALE_F32 %90, %90, %105, implicit $exec - %108:vgpr_32 = V_RCP_F32_e32 0, implicit $exec + %106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32 %90, %90, %105, implicit $mode, implicit $exec + %108:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec %109:vgpr_32 = IMPLICIT_DEF - %110:vgpr_32 = V_FMA_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %111:vgpr_32, %112:sreg_64 = V_DIV_SCALE_F32 0, 0, 0, implicit $exec - %113:vgpr_32 = V_MUL_F32_e32 0, %110, implicit $exec + %110:vgpr_32 = nofpexcept V_FMA_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32 0, 0, 0, implicit $mode, implicit $exec + %113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %110, implicit $mode, implicit $exec %114:vgpr_32 = IMPLICIT_DEF %115:vgpr_32 = IMPLICIT_DEF %116:vgpr_32 = IMPLICIT_DEF $vcc = IMPLICIT_DEF - %117:vgpr_32 = V_DIV_FMAS_F32 0, %116, 0, %110, 0, %115, 0, 0, implicit killed $vcc, implicit $exec - %118:vgpr_32 = V_DIV_FIXUP_F32 0, %117, 0, %90, 0, %105, 0, 0, implicit $exec + %117:vgpr_32 = nofpexcept V_DIV_FMAS_F32 0, %116, 0, %110, 0, %115, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec + %118:vgpr_32 = nofpexcept V_DIV_FIXUP_F32 0, %117, 0, %90, 0, %105, 0, 0, implicit $mode, implicit $exec %119:vgpr_32 = IMPLICIT_DEF %120:vgpr_32 = IMPLICIT_DEF %121:vgpr_32 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir index b4ac7cf8732c4e..192bce362c4f96 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir @@ -50,12 +50,12 @@ body: | # GCN-LABEL: {{^}}name: trunc_shr_f32 # CI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit $exec -# CI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def $vcc, implicit $exec +# CI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit $mode, implicit $exec, implicit-def $vcc # VI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def $vcc, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit $mode, implicit $exec, implicit-def $vcc -#GFX9: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_sdwa 0, %{{[0-9]+}}, 1, 2, 6, 0, 5, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_sdwa 0, %{{[0-9]+}}, 1, 2, 6, 0, 5, implicit $mode, implicit $exec --- name: trunc_shr_f32 @@ -82,7 +82,7 @@ body: | %0 = COPY $vgpr0_vgpr1 %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec - %11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit-def $vcc, implicit $exec + %11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit $mode, implicit $exec, implicit-def $vcc FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir index bd518924bb588a..688e039b16640d 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir @@ -3,21 +3,21 @@ # GCN-LABEL: {{^}}name: vop1_instructions # GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec # GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $mode, implicit $exec --- name: vop1_instructions @@ -88,43 +88,43 @@ body: | %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_MOV_B32_e32 %10, implicit $exec %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec - %14 = V_FRACT_F32_e32 123, implicit $exec + %14 = V_FRACT_F32_e32 123, implicit $mode, implicit $exec %15 = V_LSHLREV_B32_e64 16, %14, implicit $exec %16 = V_LSHRREV_B32_e64 16, %15, implicit $exec - %17 = V_SIN_F32_e32 %16, implicit $exec + %17 = V_SIN_F32_e32 %16, implicit $mode, implicit $exec %18 = V_LSHLREV_B32_e64 16, %17, implicit $exec %19 = V_LSHRREV_B32_e64 16, %18, implicit $exec - %20 = V_CVT_U32_F32_e32 %19, implicit $exec + %20 = V_CVT_U32_F32_e32 %19, implicit $mode, implicit $exec %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec - %23 = V_CVT_F32_I32_e32 123, implicit $exec + %23 = V_CVT_F32_I32_e32 123, implicit $mode, implicit $exec %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec %26 = V_MOV_B32_e64 %25, implicit $exec %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec - %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $exec + %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec - %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $exec + %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $mode, implicit $exec %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec - %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $exec + %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $mode, implicit $exec %34 = V_LSHLREV_B32_e64 16, %33, implicit $exec - %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $exec + %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $mode, implicit $exec %36 = V_LSHLREV_B32_e64 16, %35, implicit $exec %37 = V_LSHRREV_B32_e64 16, %36, implicit $exec - %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $exec + %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $mode, implicit $exec %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec %40 = V_LSHRREV_B32_e64 16, %39, implicit $exec - %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $exec + %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $mode, implicit $exec %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec - %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $exec + %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $mode, implicit $exec %45 = V_LSHLREV_B32_e64 16, %44, implicit $exec %46 = V_LSHRREV_B32_e64 16, %45, implicit $exec - %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $exec + %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $mode, implicit $exec %48 = V_LSHLREV_B32_e64 16, %47, implicit $exec @@ -139,21 +139,21 @@ body: | # GCN-LABEL: {{^}}name: vop2_instructions # GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec # GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $exec -# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $mode, implicit $exec +# GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $mode, implicit $exec name: vop2_instructions tracksRegLiveness: true @@ -237,18 +237,18 @@ body: | %13 = V_LSHLREV_B32_e64 16, %12, implicit $exec %14 = V_LSHRREV_B32_e64 16, %13, implicit $exec %15 = V_BFE_U32 %13, 8, 8, implicit $exec - %16 = V_ADD_F32_e32 %14, %15, implicit $exec + %16 = V_ADD_F32_e32 %14, %15, implicit $mode, implicit $exec %17 = V_LSHLREV_B32_e64 16, %16, implicit $exec %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec %19 = V_BFE_U32 %17, 8, 8, implicit $exec - %20 = V_SUB_F16_e32 %18, %19, implicit $exec + %20 = V_SUB_F16_e32 %18, %19, implicit $mode, implicit $exec %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec %22 = V_BFE_U32 %20, 8, 8, implicit $exec - %23 = V_FMAC_F32_e32 %21, %22, %22, implicit $exec + %23 = V_FMAC_F32_e32 %21, %22, %22, implicit $mode, implicit $exec %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %24, implicit $exec %26 = V_BFE_U32 %24, 8, 8, implicit $exec - %27 = V_FMAC_F16_e32 %25, %26, %26, implicit $exec + %27 = V_FMAC_F16_e32 %25, %26, %26, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec @@ -256,32 +256,32 @@ body: | %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec %33 = V_BFE_U32 %31, 8, 8, implicit $exec - %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $exec + %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $mode, implicit $exec %35 = V_LSHLREV_B32_e64 16, %34, implicit $exec %37 = V_BFE_U32 %35, 8, 8, implicit $exec - %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $exec + %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $mode, implicit $exec %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec %40 = V_BFE_U32 %39, 8, 8, implicit $exec - %41 = V_FMAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $exec + %41 = V_FMAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $mode, implicit $exec %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec %44 = V_BFE_U32 %42, 8, 8, implicit $exec - %45 = V_FMAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $exec + %45 = V_FMAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $mode, implicit $exec %46 = V_LSHLREV_B32_e64 16, %45, implicit $exec %47 = V_LSHRREV_B32_e64 16, %46, implicit $exec %48 = V_BFE_U32 %46, 8, 8, implicit $exec - %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $exec + %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $mode, implicit $exec %50 = V_LSHLREV_B32_e64 16, %49, implicit $exec %51 = V_BFE_U32 %50, 8, 8, implicit $exec - %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $exec + %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $mode, implicit $exec %53 = V_LSHLREV_B32_e64 16, %52, implicit $exec %54 = V_BFE_U32 %53, 8, 8, implicit $exec - %55 = V_FMAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $exec + %55 = V_FMAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $mode, implicit $exec %56 = V_LSHLREV_B32_e64 16, %55, implicit $exec %57 = V_LSHRREV_B32_e64 16, %56, implicit $exec %58 = V_BFE_U32 %56, 8, 8, implicit $exec - %59 = V_FMAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $exec + %59 = V_FMAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $mode, implicit $exec %60 = V_LSHLREV_B32_e64 16, %59, implicit $exec %100 = V_MOV_B32_e32 %60, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir index 8ba20b4a66ddbd..fa55e1be8a3f73 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -4,28 +4,28 @@ # GFX89-LABEL: {{^}}name: vop1_instructions # GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec # GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $mode, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit $mode, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $mode, implicit $exec --- @@ -97,43 +97,43 @@ body: | %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_MOV_B32_e32 %10, implicit $exec %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec - %14 = V_FRACT_F32_e32 123, implicit $exec + %14 = V_FRACT_F32_e32 123, implicit $mode, implicit $exec %15 = V_LSHLREV_B32_e64 16, %14, implicit $exec %16 = V_LSHRREV_B32_e64 16, %15, implicit $exec - %17 = V_SIN_F32_e32 %16, implicit $exec + %17 = V_SIN_F32_e32 %16, implicit $mode, implicit $exec %18 = V_LSHLREV_B32_e64 16, %17, implicit $exec %19 = V_LSHRREV_B32_e64 16, %18, implicit $exec - %20 = V_CVT_U32_F32_e32 %19, implicit $exec + %20 = V_CVT_U32_F32_e32 %19, implicit $mode, implicit $exec %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec - %23 = V_CVT_F32_I32_e32 123, implicit $exec + %23 = V_CVT_F32_I32_e32 123, implicit $mode, implicit $exec %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec %26 = V_MOV_B32_e64 %25, implicit $exec %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec - %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $exec + %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec - %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $exec + %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $mode, implicit $exec %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec - %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $exec + %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $mode, implicit $exec %34 = V_LSHLREV_B32_e64 16, %33, implicit $exec - %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $exec + %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $mode, implicit $exec %36 = V_LSHLREV_B32_e64 16, %35, implicit $exec %37 = V_LSHRREV_B32_e64 16, %36, implicit $exec - %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $exec + %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $mode, implicit $exec %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec %40 = V_LSHRREV_B32_e64 16, %39, implicit $exec - %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $exec + %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $mode, implicit $exec %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec - %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $exec + %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $mode, implicit $exec %45 = V_LSHLREV_B32_e64 16, %44, implicit $exec %46 = V_LSHRREV_B32_e64 16, %45, implicit $exec - %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $exec + %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $mode, implicit $exec %48 = V_LSHLREV_B32_e64 16, %47, implicit $exec @@ -149,40 +149,40 @@ body: | # VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $mode, implicit $exec # VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $mode, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit $exec -# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $mode, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $mode, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $mode, implicit $exec name: vop2_instructions tracksRegLiveness: true @@ -266,18 +266,18 @@ body: | %13 = V_LSHLREV_B32_e64 16, %12, implicit $exec %14 = V_LSHRREV_B32_e64 16, %13, implicit $exec %15 = V_BFE_U32 %13, 8, 8, implicit $exec - %16 = V_ADD_F32_e32 %14, %15, implicit $exec + %16 = V_ADD_F32_e32 %14, %15, implicit $mode, implicit $exec %17 = V_LSHLREV_B32_e64 16, %16, implicit $exec %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec %19 = V_BFE_U32 %17, 8, 8, implicit $exec - %20 = V_SUB_F16_e32 %18, %19, implicit $exec + %20 = V_SUB_F16_e32 %18, %19, implicit $mode, implicit $exec %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec %22 = V_BFE_U32 %20, 8, 8, implicit $exec - %23 = V_MAC_F32_e32 %21, %22, %22, implicit $exec + %23 = V_MAC_F32_e32 %21, %22, %22, implicit $mode, implicit $exec %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec %25 = V_LSHRREV_B32_e64 16, %24, implicit $exec %26 = V_BFE_U32 %24, 8, 8, implicit $exec - %27 = V_MAC_F16_e32 %25, %26, %26, implicit $exec + %27 = V_MAC_F16_e32 %25, %26, %26, implicit $mode, implicit $exec %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec @@ -285,32 +285,32 @@ body: | %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec %33 = V_BFE_U32 %31, 8, 8, implicit $exec - %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $exec + %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $mode, implicit $exec %35 = V_LSHLREV_B32_e64 16, %34, implicit $exec %37 = V_BFE_U32 %35, 8, 8, implicit $exec - %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $exec + %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $mode, implicit $exec %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec %40 = V_BFE_U32 %39, 8, 8, implicit $exec - %41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $exec + %41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $mode, implicit $exec %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec %44 = V_BFE_U32 %42, 8, 8, implicit $exec - %45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $exec + %45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $mode, implicit $exec %46 = V_LSHLREV_B32_e64 16, %45, implicit $exec %47 = V_LSHRREV_B32_e64 16, %46, implicit $exec %48 = V_BFE_U32 %46, 8, 8, implicit $exec - %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $exec + %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $mode, implicit $exec %50 = V_LSHLREV_B32_e64 16, %49, implicit $exec %51 = V_BFE_U32 %50, 8, 8, implicit $exec - %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $exec + %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $mode, implicit $exec %53 = V_LSHLREV_B32_e64 16, %52, implicit $exec %54 = V_BFE_U32 %53, 8, 8, implicit $exec - %55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $exec + %55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $mode, implicit $exec %56 = V_LSHLREV_B32_e64 16, %55, implicit $exec %57 = V_LSHRREV_B32_e64 16, %56, implicit $exec %58 = V_BFE_U32 %56, 8, 8, implicit $exec - %59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $exec + %59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $mode, implicit $exec %60 = V_LSHLREV_B32_e64 16, %59, implicit $exec %100 = V_MOV_B32_e32 %60, implicit $exec @@ -325,40 +325,40 @@ body: | # GCN-LABEL: {{^}}name: vopc_instructions # GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 123, implicit $exec -# GFX89: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $exec -# GFX89: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec +# GFX89: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $mode, implicit $exec +# GFX89: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec # GFX89: $vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $exec # GFX89: $vcc = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $exec -# VI: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def $exec, implicit $exec +# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $mode, implicit $exec +# VI: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def $exec, implicit $mode, implicit $exec # VI: $vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def $vcc, implicit $exec # VI: %{{[0-9]+}}:sreg_64 = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def $exec, implicit $exec -# GFX9: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $exec +# GFX9: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $mode, implicit $exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 23, implicit $exec -# GFX9: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec +# GFX9: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec # GFX9: $vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit $exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 23, implicit $exec # GFX9: %{{[0-9]+}}:sreg_64 = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit $exec -# VI: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit $exec -# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec +# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit $mode, implicit $exec +# VI: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# VI: $vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit $mode, implicit $exec +# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# VI: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# VI: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec -# GFX9: $vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit $exec -# GFX9: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# GFX9: $vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit $exec -# GFX9: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# GFX9: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# GFX9: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $exec -# GFX9: $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def $exec, implicit $exec +# GFX9: $vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def $vcc, implicit-def $exec, implicit $mode, implicit $exec +# GFX9: $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, implicit-def $exec, implicit $mode, implicit $exec name: vopc_instructions @@ -406,37 +406,37 @@ body: | %6 = S_MOV_B32 65535 %10 = V_AND_B32_e64 %5, %3, implicit $exec - V_CMP_EQ_F32_e32 123, killed %10, implicit-def $vcc, implicit $exec + V_CMP_EQ_F32_e32 123, killed %10, implicit-def $vcc, implicit $mode, implicit $exec %11 = V_AND_B32_e64 %5, %3, implicit $exec - V_CMPX_GT_F32_e32 123, killed %11, implicit-def $vcc, implicit-def $exec, implicit $exec + V_CMPX_GT_F32_e32 123, killed %11, implicit-def $vcc, implicit $mode, implicit-def $exec, implicit $exec %12 = V_AND_B32_e64 %5, %3, implicit $exec V_CMP_LT_I32_e32 123, killed %12, implicit-def $vcc, implicit $exec %13 = V_AND_B32_e64 %5, %3, implicit $exec V_CMPX_EQ_I32_e32 123, killed %13, implicit-def $vcc, implicit-def $exec, implicit $exec %14 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, implicit $exec + $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, implicit $mode, implicit $exec %15 = V_AND_B32_e64 %5, %3, implicit $exec - %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, implicit-def $exec, implicit $exec + %18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, implicit-def $exec, implicit $mode, implicit $exec %16 = V_AND_B32_e64 %5, %3, implicit $exec $vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit $exec %17 = V_AND_B32_e64 %5, %3, implicit $exec %19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def $exec, implicit $exec %20 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, implicit $exec + $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, implicit $mode, implicit $exec %21 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, implicit-def $exec, implicit $exec + $vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, implicit-def $exec, implicit $mode, implicit $exec %23 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, implicit $exec + $vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, implicit $mode, implicit $exec %24 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, implicit-def $exec, implicit $exec + $vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, implicit-def $exec, implicit $mode, implicit $exec %25 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, implicit-def $exec, implicit $exec + $vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, implicit-def $exec, implicit $mode, implicit $exec %26 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, implicit-def $exec, implicit $exec + $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, implicit-def $exec, implicit $mode, implicit $exec %27 = V_AND_B32_e64 %5, %3, implicit $exec - $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, implicit-def $exec, implicit $exec + $vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, implicit-def $exec, implicit $mode, implicit $exec %100 = V_MOV_B32_e32 $vcc_lo, implicit $exec @@ -447,7 +447,7 @@ body: | ... # GCN-LABEL: name: preserve_flags -# GCN: = nnan nofpexcept V_ADD_F32_sdwa 0, %4, 0, %4, 0, 0, 6, 0, 5, 1, implicit $exec +# GCN: = nnan nofpexcept V_ADD_F32_sdwa 0, %4, 0, %4, 0, 0, 6, 0, 5, 1, implicit $mode, implicit $exec --- name: preserve_flags diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir index e91268364a6530..33802ad21fdd46 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir @@ -14,7 +14,7 @@ --- name: add_f16_u32_preserve tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -32,7 +32,7 @@ registers: body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 - + %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 @@ -44,9 +44,9 @@ body: | %7 = V_BFE_U32 %3, 8, 8, implicit $exec %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec - %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec + %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec - %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec + %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec %13 = V_OR_B32_e64 %10, %12, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir b/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir index b8c36bc77148f5..6a4e942e07a96f 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-instructions-flags.mir @@ -14,7 +14,7 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %2:vgpr_32 = nnan nofpexcept V_ADD_F32_e32 [[COPY]], [[COPY1]], implicit $exec + ; CHECK: %2:vgpr_32 = nnan nofpexcept V_ADD_F32_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec ; CHECK: S_NOP 0 %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir index 531cca11e47bf9..a3747ac6ac42b8 100644 --- a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir @@ -10,7 +10,7 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -25,7 +25,7 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr4, $sgpr5, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 $sgpr3 = S_ADD_U32 $sgpr4, $sgpr5, implicit-def $scc - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -42,7 +42,7 @@ body: | $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 0 $sgpr3 = S_ADD_U32 $sgpr2, $sgpr4, implicit-def $scc - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -59,7 +59,7 @@ body: | $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 0 $sgpr3 = S_ADD_U32 $sgpr5, $sgpr4, implicit-def $scc - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -75,7 +75,7 @@ body: | liveins: $sgpr0, $sgpr1, $sgpr6, $sgpr7, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 $sgpr5 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -90,7 +90,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 0 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -106,7 +106,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 3952 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -122,7 +122,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 53007 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -137,7 +137,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT 49279 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -152,7 +152,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT_LGKMCNT $sgpr_null, 0 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -168,7 +168,7 @@ body: | liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 S_WAITCNT_LGKMCNT $sgpr_null, 1 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -187,7 +187,7 @@ body: | bb.1: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -212,12 +212,12 @@ body: | bb.1: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 - $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 bb.2: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -244,17 +244,17 @@ body: | bb.1: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 - $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 bb.2: successors: %bb.3 liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 - $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr4_sgpr5 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec bb.3: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr4, $sgpr5, $vgpr0, $vgpr1 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -268,7 +268,7 @@ body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 successors: %bb.1 - $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $exec + $sgpr0_sgpr1 = V_CMP_EQ_F32_e64 0, $vgpr0, 0, $vgpr1, 1, implicit $mode, implicit $exec bb.1: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 @@ -286,7 +286,7 @@ body: | bb.0: liveins: $vcc, $vgpr0 $sgpr0 = S_LOAD_DWORD_IMM $vcc, 0, 0, 0 - V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec + V_CMP_EQ_F32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir index fd0debda403c2b..2be645954aed69 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -7,10 +7,10 @@ # CHECK-LABEL: name: expecting_non_empty_interval -# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $exec +# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $mode, implicit $exec # CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) # CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec -# CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $exec +# CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $mode, implicit $exec # CHECK: S_NOP 0, implicit %6.sub1 # CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) @@ -26,9 +26,9 @@ body: | bb.0: successors: %bb.1 - undef %0.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %0.sub1, implicit $exec + undef %0.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %0.sub1, implicit $mode, implicit $exec undef %2.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec - dead %3:vgpr_32 = V_MUL_F32_e32 0, %2.sub1, implicit $exec + dead %3:vgpr_32 = V_MUL_F32_e32 0, %2.sub1, implicit $mode, implicit $exec bb.1: S_NOP 0, implicit %2.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir index 1bac81699edd57..0fa0ddab4e11f5 100644 --- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -85,10 +85,10 @@ body: | bb.6: successors: %bb.8(0x40000000), %bb.11(0x40000000) %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - dead %6:vgpr_32 = V_MUL_F32_e32 0, undef %7:vgpr_32, implicit $exec - dead %8:vgpr_32 = V_MUL_F32_e32 0, %2, implicit $exec - undef %9.sub1:vreg_64 = V_MUL_F32_e32 0, %1, implicit $exec - undef %10.sub0:vreg_128 = V_MUL_F32_e32 0, %0, implicit $exec + dead %6:vgpr_32 = V_MUL_F32_e32 0, undef %7:vgpr_32, implicit $mode, implicit $exec + dead %8:vgpr_32 = V_MUL_F32_e32 0, %2, implicit $mode, implicit $exec + undef %9.sub1:vreg_64 = V_MUL_F32_e32 0, %1, implicit $mode, implicit $exec + undef %10.sub0:vreg_128 = V_MUL_F32_e32 0, %0, implicit $mode, implicit $exec undef %11.sub0:sgpr_256 = S_MOV_B32 0 %11.sub1:sgpr_256 = COPY %11.sub0 %11.sub2:sgpr_256 = COPY %11.sub0 @@ -161,31 +161,31 @@ body: | bb.13: successors: %bb.15(0x40000000), %bb.14(0x40000000) - %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec - %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec + %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec + %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec %20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0, 0 :: (dereferenceable invariant load 16) - %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $exec - %23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $exec + %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec + %23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %24:vgpr_32 = COPY %20.sub3 - %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $exec + %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec %26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0, 0 :: (dereferenceable invariant load 16) - %28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $exec - %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $exec - %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $exec - %25:vgpr_32 = V_MAC_F32_e32 0, %18, %25, implicit $exec - %31:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel) 0, 0, %12.sub0, 0, %24, 0, 0, implicit $exec - %32:vgpr_32 = V_ADD_F32_e32 %25, %31, implicit $exec - %33:vgpr_32 = V_MUL_F32_e32 %22, %30, implicit $exec - %34:vgpr_32 = V_MUL_F32_e32 %23, %30, implicit $exec - %35:vgpr_32 = V_MUL_F32_e32 %32, %30, implicit $exec - %36:vgpr_32 = V_MUL_F32_e32 0, %34, implicit $exec - %36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $exec - %37:vgpr_32 = V_MAD_F32 0, %35, 0, 0, 0, 0, 0, 0, implicit $exec + %28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec + %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec + %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec + %25:vgpr_32 = V_MAC_F32_e32 0, %18, %25, implicit $mode, implicit $exec + %31:vgpr_32 = V_MAD_F32 0, target-flags(amdgpu-gotprel) 0, 0, %12.sub0, 0, %24, 0, 0, implicit $mode, implicit $exec + %32:vgpr_32 = V_ADD_F32_e32 %25, %31, implicit $mode, implicit $exec + %33:vgpr_32 = V_MUL_F32_e32 %22, %30, implicit $mode, implicit $exec + %34:vgpr_32 = V_MUL_F32_e32 %23, %30, implicit $mode, implicit $exec + %35:vgpr_32 = V_MUL_F32_e32 %32, %30, implicit $mode, implicit $exec + %36:vgpr_32 = V_MUL_F32_e32 0, %34, implicit $mode, implicit $exec + %36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $mode, implicit $exec + %37:vgpr_32 = V_MAD_F32 0, %35, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec %39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc - %40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $exec + %40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $mode, implicit $exec S_CBRANCH_VCCZ %bb.15, implicit $vcc bb.14: @@ -194,9 +194,9 @@ body: | bb.15: successors: %bb.16(0x40000000), %bb.18(0x40000000) - %41:vgpr_32 = V_MAD_F32 0, %40, 0, 0, 0, 0, 0, 0, implicit $exec - %42:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %41, 0, implicit $exec - %43:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %41, 0, implicit $exec + %41:vgpr_32 = V_MAD_F32 0, %40, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %42:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %41, 0, implicit $mode, implicit $exec + %43:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %41, 0, implicit $mode, implicit $exec %44:sreg_64 = S_AND_B64 %43, %43, implicit-def dead $scc %45:sreg_64 = S_AND_B64 %42, %42, implicit-def dead $scc %46:sreg_64 = S_AND_B64 %45, %44, implicit-def dead $scc @@ -222,15 +222,15 @@ body: | bb.18: successors: %bb.20(0x40000000), %bb.19(0x40000000) $exec = S_OR_B64 $exec, %47, implicit-def $scc - %52:vgpr_32 = V_MAD_F32 0, %3.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %3.sub0, 0, 0, implicit $exec - %53:vgpr_32 = V_MUL_F32_e32 -2147483648, %3.sub1, implicit $exec - %53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %3.sub2, %53, implicit $exec - %54:vgpr_32 = V_MUL_F32_e32 %53, %53, implicit $exec - %54:vgpr_32 = V_MAC_F32_e32 %52, %52, %54, implicit $exec - %55:vgpr_32 = V_SQRT_F32_e32 %54, implicit $exec + %52:vgpr_32 = V_MAD_F32 0, %3.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %3.sub0, 0, 0, implicit $mode, implicit $exec + %53:vgpr_32 = V_MUL_F32_e32 -2147483648, %3.sub1, implicit $mode, implicit $exec + %53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %3.sub2, %53, implicit $mode, implicit $exec + %54:vgpr_32 = V_MUL_F32_e32 %53, %53, implicit $mode, implicit $exec + %54:vgpr_32 = V_MAC_F32_e32 %52, %52, %54, implicit $mode, implicit $exec + %55:vgpr_32 = V_SQRT_F32_e32 %54, implicit $mode, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %56:vgpr_32 = V_MOV_B32_e32 981668463, implicit $exec - %57:sreg_64 = V_CMP_NGT_F32_e64 0, %55, 0, %56, 0, implicit $exec + %57:sreg_64 = V_CMP_NGT_F32_e64 0, %55, 0, %56, 0, implicit $mode, implicit $exec %58:sreg_64 = S_AND_B64 $exec, %57, implicit-def dead $scc $vcc = COPY %58 S_CBRANCH_VCCZ %bb.20, implicit $vcc @@ -255,8 +255,8 @@ body: | bb.23: successors: %bb.22(0x80000000) - undef %60.sub1:vreg_64 = V_CVT_I32_F32_e32 %1, implicit $exec - %60.sub0:vreg_64 = V_CVT_I32_F32_e32 %0, implicit $exec + undef %60.sub1:vreg_64 = V_CVT_I32_F32_e32 %1, implicit $mode, implicit $exec + %60.sub0:vreg_64 = V_CVT_I32_F32_e32 %0, implicit $mode, implicit $exec undef %61.sub0:sgpr_256 = S_MOV_B32 0 %61.sub1:sgpr_256 = COPY %61.sub0 %61.sub2:sgpr_256 = COPY %61.sub0 @@ -266,20 +266,20 @@ body: | %61.sub6:sgpr_256 = COPY %61.sub0 %61.sub7:sgpr_256 = COPY %61.sub0 %62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec - %63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $exec - %63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $exec + %63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec + %63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec %64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) - %64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $exec - %65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $exec - %66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $exec - %67:vgpr_32 = V_MAD_F32 0, %66, 0, %62, 0, 0, 0, 0, implicit $exec - %63:vgpr_32 = V_MAC_F32_e32 %15.sub2, %62, %63, implicit $exec - %4:vgpr_32 = V_ADD_F32_e32 %63, %67, implicit $exec + %64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec + %65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec + %66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec + %67:vgpr_32 = V_MAD_F32 0, %66, 0, %62, 0, 0, 0, 0, implicit $mode, implicit $exec + %63:vgpr_32 = V_MAC_F32_e32 %15.sub2, %62, %63, implicit $mode, implicit $exec + %4:vgpr_32 = V_ADD_F32_e32 %63, %67, implicit $mode, implicit $exec S_BRANCH %bb.22 bb.24: - %68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $exec - %69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $exec + %68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $mode, implicit $exec + %69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $mode, implicit $exec EXP 0, undef %71:vgpr_32, %69, undef %72:vgpr_32, undef %73:vgpr_32, -1, -1, 15, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir index b0aeb91787cd2a..343864c4cd6788 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-fma.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: test_fmamk_reg_imm_f32 -# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f32 registers: @@ -15,12 +15,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $exec + %3 = V_FMAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmamk_imm_reg_f32 -# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_FMAMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f32 registers: @@ -34,12 +34,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $exec + %3 = V_FMAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_f32 -# GCN: V_FMAAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $exec +# GCN: V_FMAAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f32 registers: @@ -51,12 +51,12 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_FMAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit $exec + %2 = V_FMAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmamk_reg_imm_f16 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 registers: @@ -70,12 +70,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $exec + %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmamk_imm_reg_f16 -# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f16 registers: @@ -89,12 +89,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $exec + %3 = V_FMAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_f16 -# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $exec +# GCN: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f16 registers: @@ -106,11 +106,11 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $exec + %2 = V_FMAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_sgpr_src0_f32 -# GCN: %2:vgpr_32 = V_FMAMK_F32 killed %0, 1078523331, %3:vgpr_32, implicit $exec +# GCN: %2:vgpr_32 = V_FMAMK_F32 killed %0, 1078523331, %3:vgpr_32, implicit $mode, implicit $exec --- name: test_fmaak_sgpr_src0_f32 @@ -124,12 +124,12 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_FMAC_F32_e32 killed %0, %1, %3, implicit $exec + %2 = V_FMAC_F32_e32 killed %0, %1, %3, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_inlineimm_src0_f32 -# GCN: %1:vgpr_32 = V_FMAMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $exec +# GCN: %1:vgpr_32 = V_FMAMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $mode, implicit $exec --- name: test_fmaak_inlineimm_src0_f32 @@ -141,12 +141,12 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_FMAC_F32_e32 1073741824, %0, %2, implicit $exec + %1 = V_FMAC_F32_e32 1073741824, %0, %2, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_otherimm_src0_f32 -# GCN: %1:vgpr_32 = V_FMAC_F32_e32 1120403456, %0, %1, implicit $exec +# GCN: %1:vgpr_32 = V_FMAC_F32_e32 1120403456, %0, %1, implicit $mode, implicit $exec --- name: test_fmaak_otherimm_src0_f32 @@ -158,12 +158,12 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_FMAC_F32_e32 1120403456, %0, %2, implicit $exec + %1 = V_FMAC_F32_e32 1120403456, %0, %2, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_other_constantlike_src0_f32 -# GCN: %1:vgpr_32 = V_FMAC_F32_e32 %stack.0, %0, %1, implicit $exec +# GCN: %1:vgpr_32 = V_FMAC_F32_e32 %stack.0, %0, %1, implicit $mode, implicit $exec --- name: test_fmaak_other_constantlike_src0_f32 registers: @@ -178,12 +178,12 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_FMAC_F32_e32 %stack.0, %0, %2, implicit $exec + %1 = V_FMAC_F32_e32 %stack.0, %0, %2, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_fmaak_inline_literal_f16 -# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $exec +# GCN: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec --- name: test_fmaak_inline_literal_f16 @@ -192,11 +192,11 @@ liveins: body: | bb.0: liveins: $vgpr0 - + %3:vgpr_32 = COPY killed $vgpr0 %26:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - %28:vgpr_32 = V_FMAC_F16_e32 16384, killed %3, %26, implicit $exec + %28:vgpr_32 = V_FMAC_F16_e32 16384, killed %3, %26, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir index e75194bf4b8f07..75949e6a2476e6 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: test_madmk_reg_imm_f32 -# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_madmk_reg_imm_f32 registers: @@ -15,12 +15,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_MAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $exec + %3 = V_MAC_F32_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madmk_imm_reg_f32 -# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_MADMK_F32 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_madmk_imm_reg_f32 registers: @@ -34,12 +34,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_MAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $exec + %3 = V_MAC_F32_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madak_f32 -# GCN: V_MADAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $exec +# GCN: V_MADAK_F32 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_madak_f32 registers: @@ -51,12 +51,12 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_MAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit $exec + %2 = V_MAC_F32_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madmk_reg_imm_f16 -# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_madmk_reg_imm_f16 registers: @@ -70,12 +70,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_MAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $exec + %3 = V_MAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madmk_imm_reg_f16 -# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $exec +# GCN: V_MADMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec --- name: test_madmk_imm_reg_f16 registers: @@ -89,12 +89,12 @@ body: | %0 = IMPLICIT_DEF %1 = COPY %0.sub1 %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_MAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $exec + %3 = V_MAC_F16_e32 %2, killed %0.sub0, killed %1, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madak_f16 -# GCN: V_MADAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $exec +# GCN: V_MADAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_madak_f16 registers: @@ -106,14 +106,14 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $exec + %2 = V_MAC_F16_e32 killed %0.sub0, %0.sub1, %1, implicit $mode, implicit $exec ... # Make sure constant bus restriction isn't violated if src0 is an SGPR. # GCN-LABEL: name: test_madak_sgpr_src0_f32 # GCN: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec -# GCN: %2:vgpr_32 = V_MAD_F32 0, killed %0, 0, %1, 0, %3:vgpr_32, 0, 0, implicit $exec +# GCN: %2:vgpr_32 = V_MAD_F32 0, killed %0, 0, %1, 0, %3:vgpr_32, 0, 0, implicit $mode, implicit $exec --- name: test_madak_sgpr_src0_f32 @@ -127,14 +127,14 @@ body: | %0 = IMPLICIT_DEF %1 = V_MOV_B32_e32 1078523331, implicit $exec - %2 = V_MAC_F32_e32 killed %0, %1, %3, implicit $exec + %2 = V_MAC_F32_e32 killed %0, %1, %3, implicit $mode, implicit $exec ... # This can still fold if this is an inline immediate. # GCN-LABEL: name: test_madak_inlineimm_src0_f32 -# GCN: %1:vgpr_32 = V_MADMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $exec +# GCN: %1:vgpr_32 = V_MADMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit $mode, implicit $exec --- name: test_madak_inlineimm_src0_f32 @@ -146,13 +146,13 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_MAC_F32_e32 1073741824, %0, %2, implicit $exec + %1 = V_MAC_F32_e32 1073741824, %0, %2, implicit $mode, implicit $exec ... # Non-inline immediate uses constant bus already. # GCN-LABEL: name: test_madak_otherimm_src0_f32 -# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit $exec +# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit $mode, implicit $exec --- name: test_madak_otherimm_src0_f32 @@ -164,13 +164,13 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_MAC_F32_e32 1120403456, %0, %2, implicit $exec + %1 = V_MAC_F32_e32 1120403456, %0, %2, implicit $mode, implicit $exec ... # Non-inline immediate uses constant bus already. # GCN-LABEL: name: test_madak_other_constantlike_src0_f32 -# GCN: %1:vgpr_32 = V_MAC_F32_e32 %stack.0, %0, %1, implicit $exec +# GCN: %1:vgpr_32 = V_MAC_F32_e32 %stack.0, %0, %1, implicit $mode, implicit $exec --- name: test_madak_other_constantlike_src0_f32 registers: @@ -185,12 +185,12 @@ body: | bb.0: %0 = V_MOV_B32_e32 1078523331, implicit $exec - %1 = V_MAC_F32_e32 %stack.0, %0, %2, implicit $exec + %1 = V_MAC_F32_e32 %stack.0, %0, %2, implicit $mode, implicit $exec ... # GCN-LABEL: name: test_madak_inline_literal_f16 -# GCN: %2:vgpr_32 = V_MADAK_F16 16384, killed %0, 49664, implicit $exec +# GCN: %2:vgpr_32 = V_MADAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec --- name: test_madak_inline_literal_f16 @@ -199,11 +199,11 @@ liveins: body: | bb.0: liveins: $vgpr0 - + %3:vgpr_32 = COPY killed $vgpr0 %26:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec - %28:vgpr_32 = V_MAC_F16_e32 16384, killed %3, %26, implicit $exec + %28:vgpr_32 = V_MAC_F16_e32 16384, killed %3, %26, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir index 9f36e0b5d68543..3190641ae69105 100644 --- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir +++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir @@ -66,7 +66,7 @@ body: | # GCN-LABEL: name: swap_phys_overlap_x # GCN: bb.0: # GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec -# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec +# GCN-NEXT: $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $mode, implicit $exec # GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec # GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec --- @@ -74,7 +74,7 @@ name: swap_phys_overlap_x body: | bb.0: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $exec + $vgpr3_vgpr4 = V_ADD_F64 0, $vgpr0_vgpr1, 0, $vgpr3_vgpr4, 0, 0, implicit $mode, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec ... diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir index 20c21aae6c7599..55453c7d263ab6 100644 --- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -17,7 +17,7 @@ body: | $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec + $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec S_CBRANCH_VCCZ %bb.1, implicit killed $vcc bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir index 5b98a82216d7f1..96b6eaf1967d58 100644 --- a/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-hazard.mir @@ -117,7 +117,7 @@ body: | $vgpr1 = IMPLICIT_DEF $sgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec $vgpr1 = V_PERMLANE16_B32 0, killed $vgpr1, 0, killed $sgpr1, 0, killed $sgpr0, $vgpr1, 0, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 1f30d91753a5f0..761ef6054b81ba 100644 --- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -94,7 +94,7 @@ body: | $sgpr4 = IMPLICIT_DEF $vgpr0 = IMPLICIT_DEF $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec - $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec $sgpr0 = S_MOV_B32 0 ... # GCN-LABEL: name: vmem_swait0_write_sgpr diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir index e3b56d3f9619a5..0e6aecb7ad7063 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir @@ -3,7 +3,7 @@ # GCN-LABEL: waitcnt-back-edge-loop # GCN: bb.2 # GCN: S_WAITCNT 112 -# GCN: $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec +# GCN: $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $mode, implicit $exec --- name: waitcnt-back-edge-loop @@ -28,7 +28,7 @@ body: | bb.1: successors: %bb.5, %bb.2 - $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $exec + $vgpr5 = V_CVT_I32_F32_e32 killed $vgpr5, implicit $mode, implicit $exec V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc S_CBRANCH_VCCZ %bb.5, implicit killed $vcc @@ -44,7 +44,7 @@ body: | successors: %bb.3, %bb.1 $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) - $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec + $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $mode, implicit $exec V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc $vgpr4 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $exec @@ -53,7 +53,7 @@ body: | bb.5: - $vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $exec + $vgpr4 = V_MAC_F32_e32 killed $vgpr0, killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec EXP_DONE 12, killed $vgpr4, undef $vgpr0, undef $vgpr0, undef $vgpr0, 0, 0, 15, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir index eae38031047f6e..e06e3031c3d2aa 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-overflow.mir @@ -16,21 +16,21 @@ body: | ; GFX9: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec ; GFX9-NOT: S_WAITCNT 53119 ; GFX9-NEXT: S_WAITCNT 52863 - ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec - ; GFX9-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $exec - ; GFX9-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $exec - ; GFX9-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $exec + ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec + ; GFX9-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: max-counter-lgkmcnt ; GFX10: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT 53631 - ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec + ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 53375 - ; GFX10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $exec + ; GFX10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 53119 - ; GFX10-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $exec + ; GFX10-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 52863 - ; GFX10-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $exec + ; GFX10-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec @@ -50,10 +50,10 @@ body: | $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec - $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec - $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $exec - $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $exec - $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $exec + $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec + $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -70,10 +70,10 @@ body: | ; GFX10-NOT: S_WAITCNT 65407 ; GFX9-NEXT: S_WAITCNT 53118 ; GFX10-NEXT: S_WAITCNT 65406 - ; GFX9_10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec - ; GFX9_10-NEXT: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $exec - ; GFX9_10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $exec - ; GFX9_10-NEXT: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $exec + ; GFX9_10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + ; GFX9_10-NEXT: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec + ; GFX9_10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + ; GFX9_10-NEXT: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec ; GFX9_10-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec @@ -142,10 +142,10 @@ body: | $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, 0, 0, implicit $exec $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, 0, 0, implicit $exec $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, implicit $exec - $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec - $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $exec - $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $exec - $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $exec + $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec + $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec + $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec + $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec S_ENDPGM 0 ... @@ -167,6 +167,6 @@ body: | EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec - $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $exec + $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir index 7a70ae5464dd41..2f453c6156b2eb 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir @@ -15,7 +15,7 @@ body: | liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 $vgpr0 = DS_BPERMUTE_B32 killed $vgpr0, killed $vgpr1, 0, implicit $exec - $vgpr0 = V_ADD_F32_e32 1065353216, killed $vgpr0, implicit $exec + $vgpr0 = V_ADD_F32_e32 1065353216, killed $vgpr0, implicit $mode, implicit $exec S_SETPC_B64_return killed $sgpr30_sgpr31, implicit killed $vgpr0 ... diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index c31c6694cb24c4..f137f715ee420d 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -15,6 +15,7 @@ ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll new file mode 100644 index 00000000000000..43479b7e541b70 --- /dev/null +++ b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll @@ -0,0 +1,191 @@ +; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV5TE,CHECK +; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-T2,CHECK +; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ARMV4T,CHECK + +@x = common dso_local global i64 0, align 8 +@y = common dso_local global i64 0, align 8 + +define void @test() { +entry: +; CHECK-LABEL: test: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x +; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #4] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]] + %0 = load volatile i64, i64* @x, align 8 + store volatile i64 %0, i64* @y, align 8 + ret void +} + +define void @test_offset() { +entry: +; CHECK-LABEL: test_offset: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x +; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #-4] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #-4] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #-4] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #-4] + %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 -4) to i64*), align 8 + store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 -4) to i64*), align 8 + ret void +} + +define void @test_offset_1() { +; CHECK-LABEL: test_offset_1: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #255] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #255] +; CHECK-T2: adds [[ADDR0:r[0-9]+]], #255 +; CHECK-T2-NEXT: adds [[ADDR1:r[0-9]+]], #255 +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #255] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #259] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #259] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #255] +entry: + %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 255) to i64*), align 8 + store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 255) to i64*), align 8 + ret void +} + +define void @test_offset_2() { +; CHECK-LABEL: test_offset_2: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #256 +; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #256 +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x +; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #256] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #256] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #256] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #260] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #260] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #256] +entry: + %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 256) to i64*), align 8 + store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 256) to i64*), align 8 + ret void +} + +define void @test_offset_3() { +; CHECK-LABEL: test_offset_3: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1020 +; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1020 +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2-NEXT: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x +; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1020] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]], #1020] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1020] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1024] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1024] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1020] +entry: + %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1020) to i64*), align 8 + store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1020) to i64*), align 8 + ret void +} + +define void @test_offset_4() { +; CHECK-LABEL: test_offset_4: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE-NEXT: add [[ADDR0]], [[ADDR0]], #1024 +; CHECK-ARMV5TE-NEXT: add [[ADDR1]], [[ADDR1]], #1024 +; CHECK-ARMV5TE-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2-NEXT: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2-NEXT: movt [[ADDR1]], :upper16:y +; CHECK-T2-NEXT: movt [[ADDR0]], :upper16:x +; CHECK-T2-NEXT: add.w [[ADDR0]], [[ADDR0]], #1024 +; CHECK-T2-NEXT: add.w [[ADDR1]], [[ADDR1]], #1024 +; CHECK-T2-NEXT: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-T2-NEXT: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T-NEXT: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #1024] +; CHECK-ARMV4T-NEXT: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]], #1028] +; CHECK-ARMV4T-NEXT: str [[R1]], {{\[}}[[ADDR1]], #1028] +; CHECK-ARMV4T-NEXT: str [[R0]], {{\[}}[[ADDR1]], #1024] +entry: + %0 = load volatile i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @x to i8*), i32 1024) to i64*), align 8 + store volatile i64 %0, i64* bitcast (i8* getelementptr (i8, i8* bitcast (i64* @y to i8*), i32 1024) to i64*), align 8 + ret void +} + +define i64 @test_stack() { +; CHECK-LABEL: test_stack: +; CHECK-ARMV5TE: sub sp, sp, #80 +; CHECK-ARMV5TE-NEXT: mov [[R0:r[0-9]+]], #0 +; CHECK-ARMV5TE-NEXT: mov [[R1:r[0-9]+]], #1 +; CHECK-ARMV5TE-NEXT: strd [[R1]], [[R0]], [sp, #8] +; CHECK-ARMV5TE-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-ARMV5TE-NEXT: add sp, sp, #80 +; CHECK-ARMV5TE-NEXT: bx lr +; CHECK-T2: sub sp, #80 +; CHECK-T2-NEXT: movs [[R0:r[0-9]+]], #0 +; CHECK-T2-NEXT: movs [[R1:r[0-9]+]], #1 +; CHECK-T2-NEXT: strd [[R1]], [[R0]], [sp, #8] +; CHECK-T2-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-T2-NEXT: add sp, #80 +; CHECK-T2-NEXT: bx lr +; CHECK-ARMV4T: sub sp, sp, #80 +; CHECK-ARMV4T-NEXT: mov [[R0:r[0-9]+]], #0 +; CHECK-ARMV4T-NEXT: str [[R0]], [sp, #12] +; CHECK-ARMV4T-NEXT: mov [[R1:r[0-9]+]], #1 +; CHECK-ARMV4T-NEXT: str [[R1]], [sp, #8] +; CHECK-ARMV4T-NEXT: ldr r0, [sp, #8] +; CHECK-ARMV4T-NEXT: ldr r1, [sp, #12] +; CHECK-ARMV4T-NEXT: add sp, sp, #80 +; CHECK-ARMV4T-NEXT: bx lr +entry: + %a = alloca [10 x i64], align 8 + %arrayidx = getelementptr inbounds [10 x i64], [10 x i64]* %a, i32 0, i32 1 + store volatile i64 1, i64* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds [10 x i64], [10 x i64]* %a, i32 0, i32 1 + %0 = load volatile i64, i64* %arrayidx1, align 8 + ret i64 %0 +} + diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll index 160be56c30a33a..e8a4f81a0240ea 100644 --- a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll @@ -56,8 +56,9 @@ entry: ret i32 %c.d } ; CHECK-LABEL: select_cc_32 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -67,8 +68,9 @@ entry: ret i64 %c.d } ; CHECK-LABEL: select_cc_32_64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 { @@ -88,8 +90,9 @@ entry: ret i32 %c.d } ; CHECK-LABEL: selecti_cc_32 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 { @@ -99,8 +102,9 @@ entry: ret i64 %c.d } ; CHECK-LABEL: selecti_cc_32_64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; Function Attrs: norecurse nounwind readnone define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 { diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll index 5a72f59593c68f..2fc1e6c2783b32 100644 --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll @@ -27,7 +27,7 @@ entry: %call = tail call i32 @helper(i32 %conv) ret i32 %call } -; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} ; CHECK-NOT: r{{[0-9]+}} >>= 32 ; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll index 46a1b231c1f0a8..da69657d02d009 100644 --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll @@ -27,8 +27,8 @@ entry: %call = tail call i32 @helper(i32 %conv) ret i32 %call } -; CHECK: r{{[0-9]+}} >>= 32 -; CHECK: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} ; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto declare dso_local i32 @helper(i32) local_unnamed_addr diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll index d46214032e6eee..3f3f9c8c4a55fa 100644 --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-3.ll @@ -44,8 +44,9 @@ for.body: ; preds = %for.body, %entry %exitcond = icmp eq i64 %inc, 100 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2 } -; CHECK: [[VAL:r[0-9]+]] <<= 32 -; CHECK: [[VAL]] >>= 32 +; CHECK: [[VAL:r[0-9]+]] = w{{[0-9]+}} +; CHECK-NOT: [[VAL:r[0-9]+]] <<= 32 +; CHECK-NOT: [[VAL]] >>= 32 ; CHECK: if [[VAL]] == 0 goto !2 = distinct !{!2, !3} diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll index 63a7c25ed33b7d..7c5be7f1987a36 100644 --- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll @@ -47,8 +47,9 @@ define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_add entry: %cmp = icmp ugt i32 %a, %b %c.d = select i1 %cmp, i64 %c, i64 %d -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 ; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto ret i64 %c.d } @@ -58,8 +59,9 @@ define dso_local i64 @select_u_2(i32 %a, i64 %b, i64 %c, i64 %d) local_unnamed_a ; CHECK-LABEL: select_u_2: entry: %conv = zext i32 %a to i64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %cmp = icmp ugt i64 %conv, %b %c.d = select i1 %cmp, i64 %c, i64 %d ret i64 %c.d @@ -84,10 +86,11 @@ entry: %call = tail call i64 bitcast (i64 (...)* @bar to i64 ()*)() #2 %conv = trunc i64 %call to i32 %cmp = icmp ult i32 %conv, 10 -; The shifts can't be optimized out because %call comes from function call -; returning i64 so the high bits might be valid. -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; %call comes from function call returning i64 so the high bits will need +; to be cleared. +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %b.c = select i1 %cmp, i32 %b, i32 %c ; CHECK: if r{{[0-9]+}} {{<|>}} {{[0-9]+}} goto ret i32 %b.c @@ -100,8 +103,9 @@ define dso_local i32* @inc_p(i32* readnone %p, i32 %a) local_unnamed_addr #0 { ; CHECK-LABEL: inc_p: entry: %idx.ext = zext i32 %a to i64 -; CHECK: r{{[0-9]+}} <<= 32 -; CHECK-NEXT: r{{[0-9]+}} >>= 32 +; CHECK: r{{[0-9]+}} = w{{[0-9]+}} +; CHECK-NOT: r{{[0-9]+}} <<= 32 +; CHECK-NOT: r{{[0-9]+}} >>= 32 %add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext ret i32* %add.ptr } diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll new file mode 100644 index 00000000000000..57ea93a8fe6e68 --- /dev/null +++ b/llvm/test/CodeGen/BPF/32-bit-subreg-zext.ll @@ -0,0 +1,21 @@ +; RUN: llc -O2 -march=bpfel -mattr=+alu32 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfel -mcpu=v3 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfeb -mattr=+alu32 < %s | FileCheck %s +; RUN: llc -O2 -march=bpfeb -mcpu=v3 < %s | FileCheck %s +; +; long zext(unsigned int a) +; { +; long b = a; +; return b; +; } + +; Function Attrs: norecurse nounwind +define dso_local i64 @zext(i32 %a) local_unnamed_addr #0 { +entry: + %conv = zext i32 %a to i64 + ; CHECK-NOT: r[[#]] <<= 32 + ; CHECK-NOT: r[[#]] >>= 32 + ret i64 %conv +} + +attributes #0 = { norecurse nounwind } diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll index baa39024ebe8d9..132be3058216b7 100644 --- a/llvm/test/CodeGen/PowerPC/check-cpu.ll +++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll @@ -2,9 +2,13 @@ ; RUN: -mcpu=future < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=future < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=power10 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 < %s | FileCheck %s -; Test mcpu=future that should be recognized on PowerPC. +; Test -mcpu=[pwr10|future] is recognized on PowerPC. ; CHECK-NOT: is not a recognized processor for this target ; CHECK: .text diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 9087c69fac96ca..c91b8143e09c77 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -27,6 +27,7 @@ ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 9843bf81e90538..b321820cf506ae 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -837,18 +837,16 @@ define i64 @load_fold_udiv1(i64* %p) { ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O3-CUR-NEXT: mulq %rcx -; CHECK-O3-CUR-NEXT: movq %rdx, %rax +; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx +; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-CUR-NEXT: mulxq %rax, %rcx, %rax ; CHECK-O3-CUR-NEXT: shrq $3, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_udiv1: ; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 -; CHECK-O3-EX-NEXT: mulq (%rdi) -; CHECK-O3-EX-NEXT: movq %rdx, %rax +; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O3-EX-NEXT: mulxq (%rdi), %rcx, %rax ; CHECK-O3-EX-NEXT: shrq $3, %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 @@ -1033,15 +1031,14 @@ define i64 @load_fold_urem1(i64* %p) { ; ; CHECK-O3-LABEL: load_fold_urem1: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx -; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 -; CHECK-O3-NEXT: movq %rcx, %rax -; CHECK-O3-NEXT: mulq %rdx +; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rdx ; CHECK-O3-NEXT: shrq $3, %rdx -; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax -; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O3-NEXT: subq %rax, %rcx -; CHECK-O3-NEXT: movq %rcx, %rax +; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rcx +; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx +; CHECK-O3-NEXT: subq %rcx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, 15 @@ -1694,28 +1691,28 @@ define void @rmw_fold_sdiv2(i64* %p, i64 %v) { define void @rmw_fold_udiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_udiv1: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: mulq %rcx -; CHECK-O0-NEXT: shrq $3, %rdx -; CHECK-O0-NEXT: movq %rdx, (%rdi) +; CHECK-O0-NEXT: movq (%rdi), %rdx +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O0-NEXT: mulxq %rax, %rcx, %rax +; CHECK-O0-NEXT: shrq $3, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: -; CHECK-O3-CUR-NEXT: movq (%rdi), %rax -; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O3-CUR-NEXT: mulq %rcx -; CHECK-O3-CUR-NEXT: shrq $3, %rdx -; CHECK-O3-CUR-NEXT: movq %rdx, (%rdi) +; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx +; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rcx +; CHECK-O3-CUR-NEXT: shrq $3, %rcx +; CHECK-O3-CUR-NEXT: movq %rcx, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_udiv1: ; CHECK-O3-EX: # %bb.0: -; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 -; CHECK-O3-EX-NEXT: mulq (%rdi) -; CHECK-O3-EX-NEXT: shrq $3, %rdx -; CHECK-O3-EX-NEXT: movq %rdx, (%rdi) +; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rcx +; CHECK-O3-EX-NEXT: shrq $3, %rcx +; CHECK-O3-EX-NEXT: movq %rcx, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, 15 @@ -1842,27 +1839,25 @@ define void @rmw_fold_urem1(i64* %p, i64 %v) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: mulq %rcx -; CHECK-O0-NEXT: shrq $3, %rdx -; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rax -; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: subq %rax, %rcx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: movq %rax, %rdx +; CHECK-O0-NEXT: mulxq %rcx, %rdx, %rcx +; CHECK-O0-NEXT: shrq $3, %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx +; CHECK-O0-NEXT: subq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_urem1: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx -; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 -; CHECK-O3-NEXT: movq %rcx, %rax -; CHECK-O3-NEXT: mulq %rdx -; CHECK-O3-NEXT: shrq $3, %rdx -; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-O3-NEXT: movq (%rdi), %rdx +; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-O3-NEXT: mulxq %rax, %rax, %rcx +; CHECK-O3-NEXT: shrq $3, %rcx +; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax -; CHECK-O3-NEXT: subq %rax, %rcx -; CHECK-O3-NEXT: movq %rcx, (%rdi) +; CHECK-O3-NEXT: subq %rax, %rdx +; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, 15 diff --git a/llvm/test/CodeGen/X86/bmi2-x86_64.ll b/llvm/test/CodeGen/X86/bmi2-x86_64.ll index 6333732ae0f263..bb03138ccf7634 100644 --- a/llvm/test/CodeGen/X86/bmi2-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi2-x86_64.ll @@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64* %p) { ; CHECK-LABEL: mulx64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %x1 = zext i64 %x to i128 @@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y, i64* %p) { ; CHECK-LABEL: mulx64_load: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdx, %rcx -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq (%rsi) +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: movq %rdx, (%rcx) ; CHECK-NEXT: retq %y1 = load i64, i64* %y diff --git a/llvm/test/CodeGen/X86/bmi2.ll b/llvm/test/CodeGen/X86/bmi2.ll index 114f9ac5479af1..bf78cb4f72efb5 100644 --- a/llvm/test/CodeGen/X86/bmi2.ll +++ b/llvm/test/CodeGen/X86/bmi2.ll @@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32* %p) { ; X86-LABEL: mulx32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %eax, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl %edx, %edx -; X86-NEXT: mull %edx +; X86-NEXT: addl %eax, %eax +; X86-NEXT: mulxl %eax, %eax, %edx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; @@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) { ; X86-LABEL: mulx32_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: addl %eax, %eax -; X86-NEXT: mull (%edx) +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: addl %edx, %edx +; X86-NEXT: mulxl (%eax), %eax, %edx ; X86-NEXT: movl %edx, (%ecx) ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll index 13b72bdfc6dc7c..73cf898223bc61 100644 --- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll +++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll @@ -215,22 +215,21 @@ declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind define void @test_multi_def(i64* dereferenceable(8) %x1, ; CHECK-LABEL: test_multi_def: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rdx, %r8 -; CHECK-NEXT: xorl %r9d, %r9d -; CHECK-NEXT: movq (%rdi), %rdi -; CHECK-NEXT: movq (%rsi), %rsi +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: xorl %r8d, %r8d +; CHECK-NEXT: movq (%rdi), %rdx +; CHECK-NEXT: movq (%rsi), %r9 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB4_2: ## %for.body ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi -; CHECK-NEXT: addq %rax, (%r8) -; CHECK-NEXT: adcq %rdx, 8(%r8) +; CHECK-NEXT: mulxq %r9, %rsi, %rdi +; CHECK-NEXT: addq %rsi, (%rax) +; CHECK-NEXT: adcq %rdi, 8(%rax) ; CHECK-NEXT: ## %bb.1: ## %for.check ; CHECK-NEXT: ## in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: incq %r9 -; CHECK-NEXT: addq $16, %r8 -; CHECK-NEXT: cmpq %rcx, %r9 +; CHECK-NEXT: incq %r8 +; CHECK-NEXT: addq $16, %rax +; CHECK-NEXT: cmpq %rcx, %r8 ; CHECK-NEXT: jl LBB4_2 ; CHECK-NEXT: ## %bb.3: ## %exit ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll index e40f10a67dd1bf..45834f2eeecd34 100644 --- a/llvm/test/CodeGen/X86/i128-mul.ll +++ b/llvm/test/CodeGen/X86/i128-mul.ll @@ -7,48 +7,86 @@ ; PR1198 define i64 @foo(i64 %x, i64 %y) nounwind { -; X86-LABEL: foo: -; X86: # %bb.0: -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: movl %ebp, %eax -; X86-NEXT: mull %ebx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: addl %edi, %ebp -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %esi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: addl %ebp, %eax -; X86-NEXT: adcl %ebx, %ecx -; X86-NEXT: setb %al -; X86-NEXT: movzbl %al, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: mull %esi -; X86-NEXT: addl %ecx, %eax -; X86-NEXT: adcl %edi, %edx -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-NOBMI-LABEL: foo: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebp +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %ebx +; X86-NOBMI-NEXT: movl %edx, %edi +; X86-NOBMI-NEXT: movl %ebp, %eax +; X86-NOBMI-NEXT: mull %ebx +; X86-NOBMI-NEXT: movl %edx, %ebx +; X86-NOBMI-NEXT: movl %eax, %ebp +; X86-NOBMI-NEXT: addl %edi, %ebp +; X86-NOBMI-NEXT: adcl $0, %ebx +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: movl %edx, %ecx +; X86-NOBMI-NEXT: addl %ebp, %eax +; X86-NOBMI-NEXT: adcl %ebx, %ecx +; X86-NOBMI-NEXT: setb %al +; X86-NOBMI-NEXT: movzbl %al, %edi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: mull %esi +; X86-NOBMI-NEXT: addl %ecx, %eax +; X86-NOBMI-NEXT: adcl %edi, %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: popl %ebp +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: foo: -; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rsi -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: foo: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl %esi, %edx, %ebx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: mulxl %esi, %esi, %ebp +; X86-BMI-NEXT: addl %ebx, %esi +; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl %edi, %eax, %ebx +; X86-BMI-NEXT: addl %esi, %eax +; X86-BMI-NEXT: adcl %ebp, %ebx +; X86-BMI-NEXT: setb %al +; X86-BMI-NEXT: movzbl %al, %esi +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: mulxl %edi, %eax, %edx +; X86-BMI-NEXT: addl %ebx, %eax +; X86-BMI-NEXT: adcl %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: foo: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: mulq %rsi +; X64-NOBMI-NEXT: movq %rdx, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: foo: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdi, %rdx +; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax +; X64-BMI-NEXT: retq %tmp0 = zext i64 %x to i128 %tmp1 = zext i64 %y to i128 %tmp2 = mul i128 %tmp0, %tmp1 @@ -62,107 +100,202 @@ define i64 @foo(i64 %x, i64 %y) nounwind { ; zero-extended value. define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind { -; X86-LABEL: mul1: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: subl $24, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: je .LBB1_3 -; X86-NEXT: # %bb.1: # %for.body.preheader -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB1_2: # %for.body -; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax,%ebp,8), %esi -; X86-NEXT: movl 4(%eax,%ebp,8), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: mull %edi -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: mull %edx -; X86-NEXT: movl %edx, %esi -; X86-NEXT: movl %eax, %edi -; X86-NEXT: addl %ebx, %edi -; X86-NEXT: adcl %ecx, %esi -; X86-NEXT: setb %bl -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: addl %esi, %eax -; X86-NEXT: movzbl %bl, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: adcl %esi, %edx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-NEXT: adcl $0, %eax -; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, (%esi,%ebp,8) -; X86-NEXT: movl %edi, 4(%esi,%ebp,8) -; X86-NEXT: addl $1, %ebp -; X86-NEXT: movl (%esp), %edi # 4-byte Reload -; X86-NEXT: adcl $0, %edi -; X86-NEXT: movl %ebp, %esi -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: xorl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl %esi, %edi -; X86-NEXT: jne .LBB1_2 -; X86-NEXT: .LBB1_3: # %for.end -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: addl $24, %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp -; X86-NEXT: retl +; X86-NOBMI-LABEL: mul1: +; X86-NOBMI: # %bb.0: # %entry +; X86-NOBMI-NEXT: pushl %ebp +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: subl $24, %esp +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: orl %ecx, %eax +; X86-NOBMI-NEXT: je .LBB1_3 +; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: xorl %ebp, %ebp +; X86-NOBMI-NEXT: movl $0, (%esp) # 4-byte Folded Spill +; X86-NOBMI-NEXT: .p2align 4, 0x90 +; X86-NOBMI-NEXT: .LBB1_2: # %for.body +; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl (%eax,%ebp,8), %esi +; X86-NOBMI-NEXT: movl 4(%eax,%ebp,8), %ecx +; X86-NOBMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: mull %edi +; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOBMI-NEXT: movl %ecx, %eax +; X86-NOBMI-NEXT: mull %edi +; X86-NOBMI-NEXT: movl %edx, %ecx +; X86-NOBMI-NEXT: movl %eax, %ebx +; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl $0, %ecx +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: mull %edx +; X86-NOBMI-NEXT: movl %edx, %esi +; X86-NOBMI-NEXT: movl %eax, %edi +; X86-NOBMI-NEXT: addl %ebx, %edi +; X86-NOBMI-NEXT: adcl %ecx, %esi +; X86-NOBMI-NEXT: setb %bl +; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: addl %esi, %eax +; X86-NOBMI-NEXT: movzbl %bl, %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: adcl %esi, %edx +; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NOBMI-NEXT: adcl $0, %eax +; X86-NOBMI-NEXT: adcl $0, %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl %ecx, (%esi,%ebp,8) +; X86-NOBMI-NEXT: movl %edi, 4(%esi,%ebp,8) +; X86-NOBMI-NEXT: addl $1, %ebp +; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload +; X86-NOBMI-NEXT: adcl $0, %edi +; X86-NOBMI-NEXT: movl %ebp, %esi +; X86-NOBMI-NEXT: xorl %ebx, %esi +; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: orl %esi, %edi +; X86-NOBMI-NEXT: jne .LBB1_2 +; X86-NOBMI-NEXT: .LBB1_3: # %for.end +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: xorl %edx, %edx +; X86-NOBMI-NEXT: addl $24, %esp +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: popl %ebp +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: mul1: +; X86-BMI: # %bb.0: # %entry +; X86-BMI-NEXT: pushl %ebp +; X86-BMI-NEXT: pushl %ebx +; X86-BMI-NEXT: pushl %edi +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: subl $16, %esp +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: orl %ecx, %eax +; X86-BMI-NEXT: je .LBB1_3 +; X86-BMI-NEXT: # %bb.1: # %for.body.preheader +; X86-BMI-NEXT: xorl %ecx, %ecx +; X86-BMI-NEXT: xorl %edx, %edx +; X86-BMI-NEXT: xorl %ebx, %ebx +; X86-BMI-NEXT: xorl %ebp, %ebp +; X86-BMI-NEXT: .p2align 4, 0x90 +; X86-BMI-NEXT: .LBB1_2: # %for.body +; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl (%ecx,%ebx,8), %eax +; X86-BMI-NEXT: movl 4(%ecx,%ebx,8), %esi +; X86-BMI-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: mulxl %ecx, %edx, %edi +; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-BMI-NEXT: movl %esi, %edx +; X86-BMI-NEXT: mulxl %ecx, %esi, %ecx +; X86-BMI-NEXT: addl %edi, %esi +; X86-BMI-NEXT: adcl $0, %ecx +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %edi, %eax +; X86-BMI-NEXT: addl %esi, %edi +; X86-BMI-NEXT: adcl %ecx, %eax +; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload +; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI-NEXT: setb (%esp) # 1-byte Folded Spill +; X86-BMI-NEXT: addl %eax, %ecx +; X86-BMI-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload +; X86-BMI-NEXT: adcl %eax, %edx +; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-BMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-BMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-BMI-NEXT: adcl $0, %ecx +; X86-BMI-NEXT: adcl $0, %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %esi, (%eax,%ebx,8) +; X86-BMI-NEXT: movl %edi, 4(%eax,%ebx,8) +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-BMI-NEXT: addl $1, %ebx +; X86-BMI-NEXT: adcl $0, %ebp +; X86-BMI-NEXT: movl %ebx, %eax +; X86-BMI-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %ebp, %esi +; X86-BMI-NEXT: xorl %edi, %esi +; X86-BMI-NEXT: orl %eax, %esi +; X86-BMI-NEXT: jne .LBB1_2 +; X86-BMI-NEXT: .LBB1_3: # %for.end +; X86-BMI-NEXT: xorl %eax, %eax +; X86-BMI-NEXT: xorl %edx, %edx +; X86-BMI-NEXT: addl $16, %esp +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: popl %edi +; X86-BMI-NEXT: popl %ebx +; X86-BMI-NEXT: popl %ebp +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: mul1: +; X64-NOBMI: # %bb.0: # %entry +; X64-NOBMI-NEXT: testq %rdi, %rdi +; X64-NOBMI-NEXT: je .LBB1_3 +; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader +; X64-NOBMI-NEXT: movq %rcx, %r8 +; X64-NOBMI-NEXT: movq %rdx, %r9 +; X64-NOBMI-NEXT: xorl %r10d, %r10d +; X64-NOBMI-NEXT: xorl %ecx, %ecx +; X64-NOBMI-NEXT: .p2align 4, 0x90 +; X64-NOBMI-NEXT: .LBB1_2: # %for.body +; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NOBMI-NEXT: movq %r8, %rax +; X64-NOBMI-NEXT: mulq (%r9,%rcx,8) +; X64-NOBMI-NEXT: addq %r10, %rax +; X64-NOBMI-NEXT: adcq $0, %rdx +; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-NOBMI-NEXT: incq %rcx +; X64-NOBMI-NEXT: cmpq %rcx, %rdi +; X64-NOBMI-NEXT: movq %rdx, %r10 +; X64-NOBMI-NEXT: jne .LBB1_2 +; X64-NOBMI-NEXT: .LBB1_3: # %for.end +; X64-NOBMI-NEXT: xorl %eax, %eax +; X64-NOBMI-NEXT: retq ; -; X64-LABEL: mul1: -; X64: # %bb.0: # %entry -; X64-NEXT: testq %rdi, %rdi -; X64-NEXT: je .LBB1_3 -; X64-NEXT: # %bb.1: # %for.body.preheader -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: movq %rdx, %r9 -; X64-NEXT: xorl %r10d, %r10d -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB1_2: # %for.body -; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq (%r9,%rcx,8) -; X64-NEXT: addq %r10, %rax -; X64-NEXT: adcq $0, %rdx -; X64-NEXT: movq %rax, (%rsi,%rcx,8) -; X64-NEXT: incq %rcx -; X64-NEXT: cmpq %rcx, %rdi -; X64-NEXT: movq %rdx, %r10 -; X64-NEXT: jne .LBB1_2 -; X64-NEXT: .LBB1_3: # %for.end -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: retq +; X64-BMI-LABEL: mul1: +; X64-BMI: # %bb.0: # %entry +; X64-BMI-NEXT: testq %rdi, %rdi +; X64-BMI-NEXT: je .LBB1_3 +; X64-BMI-NEXT: # %bb.1: # %for.body.preheader +; X64-BMI-NEXT: movq %rcx, %r8 +; X64-BMI-NEXT: movq %rdx, %r9 +; X64-BMI-NEXT: xorl %r10d, %r10d +; X64-BMI-NEXT: xorl %ecx, %ecx +; X64-BMI-NEXT: .p2align 4, 0x90 +; X64-BMI-NEXT: .LBB1_2: # %for.body +; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-BMI-NEXT: movq %r8, %rdx +; X64-BMI-NEXT: mulxq (%r9,%rcx,8), %rax, %rdx +; X64-BMI-NEXT: addq %r10, %rax +; X64-BMI-NEXT: adcq $0, %rdx +; X64-BMI-NEXT: movq %rax, (%rsi,%rcx,8) +; X64-BMI-NEXT: incq %rcx +; X64-BMI-NEXT: cmpq %rcx, %rdi +; X64-BMI-NEXT: movq %rdx, %r10 +; X64-BMI-NEXT: jne .LBB1_2 +; X64-BMI-NEXT: .LBB1_3: # %for.end +; X64-BMI-NEXT: xorl %eax, %eax +; X64-BMI-NEXT: retq entry: %conv = zext i64 %y to i128 %cmp11 = icmp eq i64 %n, 0 diff --git a/llvm/test/CodeGen/X86/mulx32.ll b/llvm/test/CodeGen/X86/mulx32.ll index faf299f3a2dfaa..872e72d503aa36 100644 --- a/llvm/test/CodeGen/X86/mulx32.ll +++ b/llvm/test/CodeGen/X86/mulx32.ll @@ -5,8 +5,8 @@ define i64 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %edx ; CHECK-NEXT: retl %x = zext i32 %a to i64 %y = zext i32 %b to i64 @@ -17,9 +17,9 @@ define i64 @f1(i32 %a, i32 %b) { define i64 @f2(i32 %a, i32* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: mull (%ecx) +; CHECK-NEXT: mulxl (%eax), %eax, %edx ; CHECK-NEXT: retl %b = load i32, i32* %p %x = zext i32 %a to i64 diff --git a/llvm/test/CodeGen/X86/mulx64.ll b/llvm/test/CodeGen/X86/mulx64.ll index 38f1d3ea5ab32f..e038f33000937f 100644 --- a/llvm/test/CodeGen/X86/mulx64.ll +++ b/llvm/test/CodeGen/X86/mulx64.ll @@ -5,8 +5,8 @@ define i128 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq %rsi +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq %rsi, %rax, %rdx ; CHECK-NEXT: retq %x = zext i64 %a to i128 %y = zext i64 %b to i128 @@ -17,8 +17,8 @@ define i128 @f1(i64 %a, i64 %b) { define i128 @f2(i64 %a, i64* %p) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: mulq (%rsi) +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: mulxq (%rsi), %rax, %rdx ; CHECK-NEXT: retq %b = load i64, i64* %p %x = zext i64 %a to i128 diff --git a/llvm/test/CodeGen/X86/pr35636.ll b/llvm/test/CodeGen/X86/pr35636.ll index a97af6a1ac67fc..07fb37f4b62a8e 100644 --- a/llvm/test/CodeGen/X86/pr35636.ll +++ b/llvm/test/CodeGen/X86/pr35636.ll @@ -5,11 +5,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) { ; HSW-LABEL: _Z15uint64_to_asciimPc: ; HSW: # %bb.0: # %bb -; HSW-NEXT: movq %rdi, %rax -; HSW-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 -; HSW-NEXT: mulq %rcx -; HSW-NEXT: shrq $42, %rdx -; HSW-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 +; HSW-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 +; HSW-NEXT: movq %rdi, %rdx +; HSW-NEXT: mulxq %rax, %rax, %rcx +; HSW-NEXT: shrq $42, %rcx +; HSW-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; HSW-NEXT: shrq $20, %rax ; HSW-NEXT: leal (%rax,%rax,4), %eax ; HSW-NEXT: addl $5, %eax @@ -22,11 +22,11 @@ define void @_Z15uint64_to_asciimPc(i64 %arg) { ; ; ZN-LABEL: _Z15uint64_to_asciimPc: ; ZN: # %bb.0: # %bb -; ZN-NEXT: movq %rdi, %rax -; ZN-NEXT: movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81 -; ZN-NEXT: mulq %rcx -; ZN-NEXT: shrq $42, %rdx -; ZN-NEXT: imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1 +; ZN-NEXT: movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81 +; ZN-NEXT: movq %rdi, %rdx +; ZN-NEXT: mulxq %rax, %rax, %rcx +; ZN-NEXT: shrq $42, %rcx +; ZN-NEXT: imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1 ; ZN-NEXT: shrq $20, %rax ; ZN-NEXT: leal 5(%rax,%rax,4), %eax ; ZN-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll index 5223187eee7db5..dd3a733ab2178d 100644 --- a/llvm/test/CodeGen/X86/vec_saddo.ll +++ b/llvm/test/CodeGen/X86/vec_saddo.ll @@ -1078,12 +1078,13 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; SSE-NEXT: paddd %xmm1, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: pslld $31, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movmskps %xmm1, %eax +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm0, %xmm1 ; SSE-NEXT: movb %al, (%rdi) +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: saddo_v4i1: @@ -1094,8 +1095,8 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm1, %eax @@ -1110,8 +1111,8 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm1 -; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm1, %eax diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll index a3e28ae8d7baa1..5fde07d1269df8 100644 --- a/llvm/test/CodeGen/X86/vec_smulo.ll +++ b/llvm/test/CodeGen/X86/vec_smulo.ll @@ -3422,21 +3422,20 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSE2-NEXT: psubd %xmm2, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pslld $31, %xmm3 -; SSE2-NEXT: psrad $31, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movmskps %xmm3, %eax +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pslld $31, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: psrad $31, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: movb %al, (%rdi) ; SSE2-NEXT: retq ; @@ -3461,21 +3460,20 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] ; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSSE3-NEXT: psubd %xmm2, %xmm4 -; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: psrad $31, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm4, %xmm1 -; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 -; SSSE3-NEXT: pxor %xmm2, %xmm1 -; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: pslld $31, %xmm3 -; SSSE3-NEXT: psrad $31, %xmm3 -; SSSE3-NEXT: pcmpeqd %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 -; SSSE3-NEXT: por %xmm1, %xmm0 -; SSSE3-NEXT: movmskps %xmm3, %eax +; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSSE3-NEXT: movdqa %xmm2, %xmm0 +; SSSE3-NEXT: pslld $31, %xmm0 +; SSSE3-NEXT: movmskps %xmm0, %eax +; SSSE3-NEXT: psrad $31, %xmm0 +; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 +; SSSE3-NEXT: psrad $31, %xmm2 +; SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 +; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 +; SSSE3-NEXT: pxor %xmm1, %xmm2 +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: por %xmm2, %xmm0 ; SSSE3-NEXT: movb %al, (%rdi) ; SSSE3-NEXT: retq ; @@ -3494,17 +3492,16 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] ; SSE41-NEXT: pmulld %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 +; SSE41-NEXT: pslld $31, %xmm3 +; SSE41-NEXT: movmskps %xmm3, %eax ; SSE41-NEXT: psrad $31, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm1, %xmm2 -; SSE41-NEXT: pslld $31, %xmm2 -; SSE41-NEXT: psrad $31, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE41-NEXT: psrad $31, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 -; SSE41-NEXT: movmskps %xmm2, %eax +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: movb %al, (%rdi) ; SSE41-NEXT: retq ; @@ -3526,8 +3523,8 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm3 -; AVX1-NEXT: vpsrad $31, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm3, %xmm4 +; AVX1-NEXT: vpcmpeqd %xmm0, %xmm4, %xmm0 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm3, %eax @@ -3552,8 +3549,8 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm3 -; AVX2-NEXT: vpsrad $31, %xmm3, %xmm3 -; AVX2-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm3, %xmm4 +; AVX2-NEXT: vpcmpeqd %xmm0, %xmm4, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm3, %eax diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll index 21a5e71036d783..8ab9367c32f861 100644 --- a/llvm/test/CodeGen/X86/vec_ssubo.ll +++ b/llvm/test/CodeGen/X86/vec_ssubo.ll @@ -1088,12 +1088,13 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; SSE-NEXT: psubd %xmm1, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: pslld $31, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movmskps %xmm1, %eax +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm0, %xmm1 ; SSE-NEXT: movb %al, (%rdi) +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: ssubo_v4i1: @@ -1104,8 +1105,8 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovmskps %xmm1, %eax @@ -1120,8 +1121,8 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpslld $31, %xmm0, %xmm1 -; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX2-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovmskps %xmm1, %eax diff --git a/llvm/test/DebugInfo/X86/default-subrange-array.ll b/llvm/test/DebugInfo/X86/default-subrange-array.ll index 1374cd888861d7..fde789a106bbbd 100644 --- a/llvm/test/DebugInfo/X86/default-subrange-array.ll +++ b/llvm/test/DebugInfo/X86/default-subrange-array.ll @@ -24,7 +24,7 @@ source_filename = "test/DebugInfo/X86/default-subrange-array.ll" ; CHECK-NEXT: DW_AT_type ; CHECK: DW_TAG_subrange_type ; CHECK-NEXT: DW_AT_type -; DWARF4-NEXT: DW_AT_lower_bound [DW_FORM_data1] (0x00) +; DWARF4-NEXT: DW_AT_lower_bound [DW_FORM_sdata] (0) ; CHECK-NEXT: DW_AT_count [DW_FORM_data1] (0x2a) ; DWARF5-NOT: DW_AT_lower_bound diff --git a/llvm/test/DebugInfo/X86/nondefault-subrange-array.ll b/llvm/test/DebugInfo/X86/nondefault-subrange-array.ll index 59deb7a7b9da11..7089030b873ac9 100644 --- a/llvm/test/DebugInfo/X86/nondefault-subrange-array.ll +++ b/llvm/test/DebugInfo/X86/nondefault-subrange-array.ll @@ -19,7 +19,7 @@ source_filename = "test/DebugInfo/X86/nondefault-subrange-array.ll" ; CHECK: DW_TAG_subrange_type ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]} -; CHECK-NEXT: DW_AT_lower_bound [DW_FORM_data8] (0xfffffffffffffffd) +; CHECK-NEXT: DW_AT_lower_bound [DW_FORM_sdata] (-3) ; CHECK-NEXT: DW_AT_count [DW_FORM_data1] (0x2a) ; CHECK: [[BASE]]: DW_TAG_base_type diff --git a/llvm/test/DebugInfo/cDefaultLower.ll b/llvm/test/DebugInfo/cDefaultLower.ll new file mode 100644 index 00000000000000..7cd37fe845b359 --- /dev/null +++ b/llvm/test/DebugInfo/cDefaultLower.ll @@ -0,0 +1,35 @@ +;; This test checks whether c default lowerBound is removed. +; REQUIRES: x86_64-linux + +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; c default DW_AT_lower_bound(0) is not dumped. +; CHECK-LABEL: DW_TAG_subrange_type +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_upper_bound (4) + +;; c non-default lowerBound=1 is dumped. +; CHECK-LABEL: DW_TAG_subrange_type +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_lower_bound (1) +; CHECK-NEXT: DW_AT_upper_bound (5) + +; ModuleID = 'cDefaultLower.c' +source_filename = "cDefaultLower.c" + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !4, imports: !4) +!3 = !DIFile(filename: "cDefaultLower.c", directory: "dir") +!4 = !{} +!5 = !{!6, !10} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 160, align: 32, elements: !8) +!7 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(lowerBound: 0, upperBound: 4) +!10 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 160, align: 32, elements: !11) +!11 = !{!12} +!12 = !DISubrange(lowerBound: 1, upperBound: 5) diff --git a/llvm/test/DebugInfo/fortranDefaultLower.ll b/llvm/test/DebugInfo/fortranDefaultLower.ll new file mode 100644 index 00000000000000..face5d12fc4fce --- /dev/null +++ b/llvm/test/DebugInfo/fortranDefaultLower.ll @@ -0,0 +1,35 @@ +;; This test checks whether fortran default lowerBound is removed. +; REQUIRES: x86_64-linux + +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; fortran default DW_AT_lower_bound(1) is not dumped. +; CHECK-LABEL: DW_TAG_subrange_type +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_upper_bound (5) + +;; fortran non-default lowerBound=2 is dumped. +; CHECK-LABEL: DW_TAG_subrange_type +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_lower_bound (2) +; CHECK-NEXT: DW_AT_upper_bound (6) + +; ModuleID = 'fortranDefaultLower.ll' +source_filename = "fortranDefaultLower.f90" + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortranDefaultLower.f90", directory: "dir") +!4 = !{} +!5 = !{!6, !10} +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 160, align: 32, elements: !8) +!7 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(lowerBound: 1, upperBound: 5) +!10 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 160, align: 32, elements: !11) +!11 = !{!12} +!12 = !DISubrange(lowerBound: 2, upperBound: 6) diff --git a/llvm/test/DebugInfo/fortranSubrangeExpr.ll b/llvm/test/DebugInfo/fortranSubrangeExpr.ll new file mode 100644 index 00000000000000..5ad5635cc6dc5d --- /dev/null +++ b/llvm/test/DebugInfo/fortranSubrangeExpr.ll @@ -0,0 +1,44 @@ +;; This test checks DISubrange bounds for DIExpression +; REQUIRES: x86_64-linux + +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; Test whether bounds are generated correctly. +; CHECK-LABEL: DW_TAG_array_type +; CHECK: DW_TAG_subrange_type +; DW_AT_lower_bound (DW_OP_push_object_address, DW_OP_plus_uconst 0x50, DW_OP_deref) +; CHECK: DW_AT_lower_bound (DW_OP_push_object_address, DW_OP_plus_uconst 0x50, DW_OP_deref) +; CHECK-NEXT: DW_AT_upper_bound (DW_OP_push_object_address, DW_OP_plus_uconst 0x78, DW_OP_deref) +; CHECK-NEXT: DW_AT_byte_stride (DW_OP_push_object_address, DW_OP_plus_uconst 0x70, DW_OP_deref, DW_OP_plus_uconst 0x4, DW_OP_mul) + +; ModuleID = 'fortsubrange.modified.strategy3check-in.ll' +source_filename = "fortsubrange.ll" + +define void @MAIN_() !dbg !5 { +L.entry: + %"arr$sd1_349" = alloca [16 x i64], align 8 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !8, metadata !DIExpression()), !dbg !13 + ret void, !dbg !14 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortsubrange.f90", directory: "/dir") +!4 = !{} +!5 = distinct !DISubprogram(name: "main", scope: !2, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !2) +!6 = !DISubroutineType(cc: DW_CC_program, types: !7) +!7 = !{null} +!8 = !DILocalVariable(name: "arr", scope: !5, file: !3, type: !9) +!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 32, align: 32, elements: !11) +!10 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!11 = !{!12} +!12 = !DISubrange(lowerBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 80, DW_OP_deref), upperBound: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 120, DW_OP_deref), stride: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 112, DW_OP_deref, DW_OP_plus_uconst, 4, DW_OP_mul)) +!13 = !DILocation(line: 0, scope: !5) +!14 = !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/DebugInfo/fortranSubrangeInt.ll b/llvm/test/DebugInfo/fortranSubrangeInt.ll new file mode 100644 index 00000000000000..34290b929cdb2c --- /dev/null +++ b/llvm/test/DebugInfo/fortranSubrangeInt.ll @@ -0,0 +1,43 @@ +;; This test checks DISubrange bounds for constants +; REQUIRES: x86_64-linux + +; RUN: %llc_dwarf %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; Test whether bounds are generated correctly. +; CHECK-LABEL: DW_TAG_array_type +; CHECK: DW_TAG_subrange_type +; CHECK: DW_AT_lower_bound (-10) +; CHECK-NEXT: DW_AT_upper_bound (10) +; CHECK-NEXT: DW_AT_byte_stride (4) + +; ModuleID = 'fortsubrange.ll' +source_filename = "fortsubrange.ll" + +define void @MAIN_() !dbg !5 { +L.entry: + %"arr$sd1_349" = alloca [16 x i64], align 8 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !8, metadata !DIExpression()), !dbg !13 + ret void, !dbg !14 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortsubrange.f90", directory: "/dir") +!4 = !{} +!5 = distinct !DISubprogram(name: "main", scope: !2, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !2) +!6 = !DISubroutineType(cc: DW_CC_program, types: !7) +!7 = !{null} +!8 = !DILocalVariable(name: "arr", scope: !5, file: !3, type: !9) +!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 32, align: 32, elements: !11) +!10 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!11 = !{!12} +!12 = !DISubrange(lowerBound: -10, upperBound: 10, stride: 4) +!13 = !DILocation(line: 0, scope: !5) +!14 = !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/DebugInfo/fortranSubrangeVar.ll b/llvm/test/DebugInfo/fortranSubrangeVar.ll new file mode 100644 index 00000000000000..5cc5f6075ff68f --- /dev/null +++ b/llvm/test/DebugInfo/fortranSubrangeVar.ll @@ -0,0 +1,63 @@ +;; This test checks DISubrange bounds for DIVariable +; REQUIRES: x86_64-linux + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s + +;; Test whether bounds are generated correctly. +; CHECK: [[DIE1:0x.+]]: DW_TAG_variable +; CHECK: DW_AT_location +; CHECK-SAME: DW_OP_plus_uconst 0x70, DW_OP_deref, DW_OP_lit4, DW_OP_mul +; CHECK: [[DIE2:0x.+]]: DW_TAG_variable +; CHECK: DW_AT_location +; CHECK-SAME: DW_OP_plus_uconst 0x78 +; CHECK: [[DIE3:0x.+]]: DW_TAG_variable +; CHECK: DW_AT_location +; CHECK-SAME: DW_OP_plus_uconst 0x50 +; CHECK: DW_TAG_subrange_type +; CHECK: DW_AT_lower_bound ([[DIE3]]) +; CHEK-NEXT: DW_AT_upper_bound ([[DIE2]]) +; CHECK-NEXT DW_AT_byte_stride ([[DIE1]]) + + +; ModuleID = 'fortsubrange.ll' +source_filename = "fortsubrange.ll" + +define void @MAIN_() !dbg !5 { +L.entry: + %.Z0640_333 = alloca i32*, align 8 + %"arr$sd1_349" = alloca [16 x i64], align 8 + call void @llvm.dbg.declare(metadata i32** %.Z0640_333, metadata !8, metadata !DIExpression(DW_OP_deref)), !dbg !17 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !13, metadata !DIExpression(DW_OP_plus_uconst, 80)), !dbg !17 + call void @llvm.dbg.value(metadata [16 x i64]* %"arr$sd1_349", metadata !16, metadata !DIExpression(DW_OP_plus_uconst, 112, DW_OP_deref, DW_OP_constu, 4, DW_OP_mul)), !dbg !17 + call void @llvm.dbg.declare(metadata [16 x i64]* %"arr$sd1_349", metadata !15, metadata !DIExpression(DW_OP_plus_uconst, 120)), !dbg !17 + ret void, !dbg !18 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Fortran90, file: !3, producer: " F90 Flang - 1.5 2017-05-01", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4, globals: !4, imports: !4) +!3 = !DIFile(filename: "fortsubrange.f90", directory: "/dir") +!4 = !{} +!5 = distinct !DISubprogram(name: "main", scope: !2, file: !3, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !2) +!6 = !DISubroutineType(cc: DW_CC_program, types: !7) +!7 = !{null} +!8 = !DILocalVariable(name: "arr", scope: !5, file: !3, type: !9) +!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 32, align: 32, elements: !11) +!10 = !DIBasicType(name: "integer", size: 32, align: 32, encoding: DW_ATE_signed) +!11 = !{!12} +!12 = !DISubrange(lowerBound: !13, upperBound: !15, stride: !16) +!13 = distinct !DILocalVariable(scope: !5, file: !3, type: !14, flags: DIFlagArtificial) +!14 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) +!15 = distinct !DILocalVariable(scope: !5, file: !3, type: !14, flags: DIFlagArtificial) +!16 = distinct !DILocalVariable(scope: !5, file: !3, type: !14, flags: DIFlagArtificial) +!17 = !DILocation(line: 0, scope: !5) +!18 = !DILocation(line: 6, column: 1, scope: !5) diff --git a/llvm/test/FileCheck/numeric-expression.txt b/llvm/test/FileCheck/numeric-expression.txt index 3d33e64a0a9ed3..d5b4db7d30ea0b 100644 --- a/llvm/test/FileCheck/numeric-expression.txt +++ b/llvm/test/FileCheck/numeric-expression.txt @@ -19,8 +19,9 @@ REDEF NO SPC // CHECK-LABEL: REDEF ; Numeric variable definition with explicit matching format. DEF FMT // CHECK-LABEL: DEF FMT -c // CHECK-NEXT: {{^}}[[#%x,LHEX:]] -D // CHECK-NEXT: {{^}}[[#%X,UHEX:]] +c // CHECK-NEXT: {{^}}[[#%x,LHEX:]] +D // CHECK-NEXT: {{^}}[[#%X,UHEX:]] +-30 // CHECK-NEXT: {{^}}[[#%d,SIGN:]] ; Numeric variable definition with explicit matching format with different ; spacing. @@ -64,6 +65,10 @@ E // CHECK-NEXT: {{^}}[[#%X,UHEX+1]] C // CHECK-NEXT: {{^}}[[#%X,UHEX-1]] 1B // CHECK-NEXT: {{^}}[[#%X,UHEX+0xe]] 1B // CHECK-NEXT: {{^}}[[#%X,UHEX+0xE]] +-30 // CHECK-NEXT: {{^}}[[#%d,SIGN]] +-29 // CHECK-NEXT: {{^}}[[#%d,SIGN+1]] +-31 // CHECK-NEXT: {{^}}[[#%d,SIGN-1]] +42 // CHECK-NEXT: {{^}}[[#%d,SIGN+72]] 11 // CHECK-NEXT: {{^}}[[#%u,UNSIa]] 11 // CHECK-NEXT: {{^}}[[#%u,UNSIb]] 11 // CHECK-NEXT: {{^}}[[#%u,UNSIc]] @@ -104,6 +109,9 @@ E // CHECK-NEXT: {{^}}[[#UHEX+1]] C // CHECK-NEXT: {{^}}[[#UHEX-1]] 1B // CHECK-NEXT: {{^}}[[#UHEX+0xe]] 1B // CHECK-NEXT: {{^}}[[#UHEX+0xE]] +-30 // CHECK-NEXT: {{^}}[[#SIGN]] +-29 // CHECK-NEXT: {{^}}[[#SIGN+1]] +-31 // CHECK-NEXT: {{^}}[[#SIGN-1]] ; Numeric expressions using variables defined on other lines and an immediate ; interpreted as an unsigned value. @@ -118,10 +126,16 @@ CHECK-NEXT: [[#UNSI+0x8000000000000000]] USE CONV FMT IMPL MATCH // CHECK-LABEL: USE CONV FMT IMPL MATCH b // CHECK-NEXT: {{^}}[[# %x, UNSI]] B // CHECK-NEXT: {{^}}[[# %X, UNSI]] +-1 // CHECK-NEXT: {{^}}[[# %d, UNSI-12]] 12 // CHECK-NEXT: {{^}}[[# %u, LHEX]] C // CHECK-NEXT: {{^}}[[# %X, LHEX]] +-2 // CHECK-NEXT: {{^}}[[# %d, LHEX-14]] 13 // CHECK-NEXT: {{^}}[[# %u, UHEX]] d // CHECK-NEXT: {{^}}[[# %x, UHEX]] +-5 // CHECK-NEXT: {{^}}[[# %d, UHEX-18]] +15 // CHECK-NEXT: {{^}}[[# %u, SIGN+45]] +f // CHECK-NEXT: {{^}}[[# %x, SIGN+45]] +F // CHECK-NEXT: {{^}}[[# %X, SIGN+45]] ; Conflicting implicit format. RUN: %ProtectFileCheckOutput \ @@ -329,3 +343,27 @@ REDEF-NEW-FMT-NEXT: [[#%X,UNSI:]] REDEF-NEW-FMT-MSG: numeric-expression.txt:[[#@LINE-1]]:31: error: format different from previous variable definition REDEF-NEW-FMT-MSG-NEXT: {{R}}EDEF-NEW-FMT-NEXT: {{\[\[#%X,UNSI:\]\]}} REDEF-NEW-FMT-MSG-NEXT: {{^}} ^{{$}} + +; Numeric expression with overflow. +RUN: not FileCheck --check-prefix OVERFLOW --input-file %s %s 2>&1 \ +RUN: | FileCheck --check-prefix OVERFLOW-MSG --strict-whitespace %s + +OVERFLOW +BIGVAR=10000000000000000 +OVERFLOW-LABEL: OVERFLOW +OVERFLOW-NEXT: BIGVAR: [[#BIGVAR:0x8000000000000000+0x8000000000000000]] +OVERFLOW-MSG: numeric-expression.txt:[[#@LINE-1]]:27: error: unable to substitute variable or numeric expression +OVERFLOW-MSG-NEXT: {{O}}VERFLOW-NEXT: BIGVAR: {{\[\[#BIGVAR:0x8000000000000000\+0x8000000000000000\]\]}} +OVERFLOW-MSG-NEXT: {{^}} ^{{$}} + +; Numeric expression with underflow. +RUN: not FileCheck --check-prefix UNDERFLOW --input-file %s %s 2>&1 \ +RUN: | FileCheck --check-prefix UNDERFLOW-MSG --strict-whitespace %s + +UNDERFLOW +TINYVAR=-10000000000000000 +UNDERFLOW-LABEL: UNDERFLOW +UNDERFLOW-NEXT: TINYVAR: [[#%d,TINYVAR:-0x8000000000000000-0x8000000000000000]] +UNDERFLOW-MSG: numeric-expression.txt:[[#@LINE-1]]:29: error: unable to substitute variable or numeric expression +UNDERFLOW-MSG-NEXT: {{U}}NDERFLOW-NEXT: TINYVAR: {{\[\[#%d,TINYVAR:-0x8000000000000000-0x8000000000000000\]\]}} +UNDERFLOW-MSG-NEXT: {{^}} ^{{$}} diff --git a/llvm/test/MC/AMDGPU/mai.s b/llvm/test/MC/AMDGPU/mai.s index 76aa534bdef65b..09eddb0d258c8c 100644 --- a/llvm/test/MC/AMDGPU/mai.s +++ b/llvm/test/MC/AMDGPU/mai.s @@ -1,4 +1,5 @@ -// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck -check-prefix=GFX908 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck -check-prefix=NOGFX908 %s v_accvgpr_read_b32 v2, a0 // GFX908: v_accvgpr_read_b32 v2, a0 ; encoding: [0x02,0x00,0xd8,0xd3,0x00,0x01,0x00,0x08] @@ -24,6 +25,24 @@ v_accvgpr_write_b32 a2, v1 v_accvgpr_write a2, v255 // GFX908: v_accvgpr_write_b32 a2, v255 ; encoding: [0x02,0x00,0xd9,0xd3,0xff,0x01,0x00,0x00] +v_accvgpr_write a2, 100 +// NOGFX908: error: invalid operand for instruction + +v_accvgpr_write a2, execz +// NOGFX908: error: source operand must be either a VGPR or an inline constant + +v_accvgpr_write a2, vccz +// NOGFX908: error: source operand must be either a VGPR or an inline constant + +v_accvgpr_write a2, scc +// NOGFX908: error: source operand must be either a VGPR or an inline constant + +v_accvgpr_write a2, shared_base +// NOGFX908: error: source operand must be either a VGPR or an inline constant + +v_accvgpr_write a2, pops_exiting_wave_id +// NOGFX908: error: source operand must be either a VGPR or an inline constant + v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] // GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x04] diff --git a/llvm/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml b/llvm/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml index 55a9df3636d694..b9b2f2d629e2df 100644 --- a/llvm/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml +++ b/llvm/test/ObjectYAML/MachO/fat_macho_i386_x86_64.yaml @@ -1,4 +1,9 @@ -# RUN: yaml2obj %s | obj2yaml | FileCheck %s +## This file contains test cases for generating Fat Mach-O binaries. + +## a) Test that yaml2obj emits Fat Mach-O binary and obj2yaml converts it +## back to YAML file. + +# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s --- !fat-mach-o FatHeader: @@ -72,3 +77,39 @@ Slices: #CHECK: flags: 0x00218085 #CHECK: reserved: 0x00000000 #CHECK: ... + +## b) Test that yaml2obj emits an error message if the number of 'FatArchs' is less than +## the number of 'Slices'. + +# RUN: not yaml2obj --docnum=2 %s -o %t2.fat-macho 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR: yaml2obj: error: cannot write 'Slices' if not described in 'FatArches' + +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + ## 2 FatArchs are expected. + - cputype: 0x00000007 + cpusubtype: 0x00000003 + offset: 0x0000000000001000 + size: 0 + align: 0 +Slices: + - FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000002 + ncmds: 0 + sizeofcmds: 0 + flags: 0x00000000 + - FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000002 + ncmds: 0 + sizeofcmds: 0 + flags: 0x00000000 diff --git a/llvm/test/ObjectYAML/MachO/sections.yaml b/llvm/test/ObjectYAML/MachO/sections.yaml index 5da789dbdef7b1..f8c5370ecc37cf 100644 --- a/llvm/test/ObjectYAML/MachO/sections.yaml +++ b/llvm/test/ObjectYAML/MachO/sections.yaml @@ -1,4 +1,8 @@ -# RUN: yaml2obj %s | obj2yaml | FileCheck %s +## This file contains test cases for generating sections in Mach-O object files. + +## a) Test that yaml2obj emits sections and obj2yaml converts them back. + +# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s --- !mach-o FileHeader: @@ -281,3 +285,58 @@ LoadCommands: #CHECK: segname: __DATA #CHECK: - sectname: __la_symbol_ptr #CHECK: segname: __DATA + +## b) Test that yaml2obj emits an error message if we specify an offset that +## makes the current section and the previous one overlap. + +# RUN: not yaml2obj --docnum=2 %s -o %t2.macho 2>&1 | FileCheck %s --check-prefix=OVERLAP + +# OVERLAP: yaml2obj: error: wrote too much data somewhere, section offsets don't line up + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x80000003 + filetype: 0x00000002 + ncmds: 1 + sizeofcmds: 1024 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 0xff + segname: __SEC + vmaddr: 0 + vmsize: 0 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 2 + flags: 0 + Sections: + - sectname: __sec1 + segname: __SEC + addr: 0x0000000000000000 + size: 2 + offset: 0x00000000 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __sec2 + segname: __SEC + addr: 0x0000000000000000 + size: 2 + offset: 0x00000001 ## Specify an offset that makes __sec1 and __sec2 overlap. + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 diff --git a/llvm/test/TableGen/AliasAsmString.td b/llvm/test/TableGen/AliasAsmString.td new file mode 100644 index 00000000000000..dedcc4b2af89ca --- /dev/null +++ b/llvm/test/TableGen/AliasAsmString.td @@ -0,0 +1,28 @@ +// RUN: llvm-tblgen -gen-asm-writer -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def ArchInstrInfo : InstrInfo { } + +def Arch : Target { + let InstructionSet = ArchInstrInfo; +} + +def Reg : Register<"reg">; + +def RegClass : RegisterClass<"foo", [i32], 0, (add Reg)>; + +def IntOperand: Operand; + +def foo : Instruction { + let Size = 2; + let OutOperandList = (outs); + let InOperandList = (ins IntOperand:$imm); + let AsmString = "foo $imm"; + let Namespace = "Arch"; +} + +def FooBraces : InstAlias<"foo \\{$imm\\}", (foo IntOperand:$imm)>; + +// CHECK: static const char AsmStrings[] = +// CHECK-NEXT: /* 0 */ "foo {$\x01}\0" diff --git a/llvm/test/TableGen/GlobalISelEmitter-SDNodeXForm-timm.td b/llvm/test/TableGen/GlobalISelEmitter-SDNodeXForm-timm.td index afc967f3f78dcb..0a12cc0bf85603 100644 --- a/llvm/test/TableGen/GlobalISelEmitter-SDNodeXForm-timm.td +++ b/llvm/test/TableGen/GlobalISelEmitter-SDNodeXForm-timm.td @@ -11,8 +11,8 @@ def gi_shiftl_1 : GICustomOperandRenderer<"renderShiftImml1">, GISDNodeXFormEquiv; -def int_mytarget_sleep : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; -def int_mytarget_foo : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<1>, IntrNoMem]>; +def int_mytarget_sleep : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +def int_mytarget_foo : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg>, IntrNoMem]>; def SLEEP : I<(outs), (ins i32imm:$src0), []>; diff --git a/llvm/test/TableGen/GlobalISelEmitter-immarg-literal-pattern.td b/llvm/test/TableGen/GlobalISelEmitter-immarg-literal-pattern.td index a87e46a837347e..2f39bf49af4d57 100644 --- a/llvm/test/TableGen/GlobalISelEmitter-immarg-literal-pattern.td +++ b/llvm/test/TableGen/GlobalISelEmitter-immarg-literal-pattern.td @@ -3,7 +3,7 @@ include "llvm/Target/Target.td" include "GlobalISelEmitterCommon.td" -def int_mytarget_sleep : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; +def int_mytarget_sleep : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; def G_TGT_CAT : MyTargetGenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/test/TableGen/immarg.td b/llvm/test/TableGen/immarg.td index 407f06c3a40ecc..c6f03cad137fa9 100644 --- a/llvm/test/TableGen/immarg.td +++ b/llvm/test/TableGen/immarg.td @@ -4,8 +4,8 @@ include "llvm/Target/Target.td" include "GlobalISelEmitterCommon.td" let TargetPrefix = "mytarget" in { -def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; -def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; +def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; +def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; } // GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, diff --git a/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll b/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll index c41d044c26bf87..3b2f98335652ef 100644 --- a/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll +++ b/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll @@ -7,13 +7,12 @@ define void @f(i8* %p, i32 %n, i32 %m) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: add w8, w2, #1 // =1 ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w9, w2, w8 -; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: strb wzr, [x0, w8, sxtw] +; CHECK-NEXT: subs w1, w1, #1 // =1 ; CHECK-NEXT: add w8, w8, #1 // =1 -; CHECK-NEXT: strb wzr, [x0, w9, sxtw] ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 932dcf8e56271f..f444404d14d0e9 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -672,3 +672,64 @@ bb: %tmp2 = extractelement <4 x i32*> %tmp, i64 0 ret i32* %tmp2 } + +; The non-zero elements of the result are always 'min', so the splat is unnecessary. + +define <4 x i8> @select_cond_with_eq_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { +; CHECK-LABEL: @select_cond_with_eq_true_false_elts( +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[SPLAT]], <4 x i8> [[TVAL]], <4 x i8> [[Y]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %splat = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> zeroinitializer + %r = select <4 x i1> %splat, <4 x i8> %tval, <4 x i8> %y + ret <4 x i8> %r +} + +; First element of the result is always x[0], so first element of select condition is unnecessary. + +define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { +; CHECK-LABEL: @select_cond_with_eq_true_false_elts2( +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[TVAL]], <4 x i8> [[X]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> + %r = select <4 x i1> %cond, <4 x i8> %tval, <4 x i8> %x + ret <4 x i8> %r +} + +; Second element of the result is always x[3], so second element of select condition is unnecessary. +; Fourth element of the result is always undef, so fourth element of select condition is unnecessary. + +define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x float> %y, <4 x i1> %cmp) { +; CHECK-LABEL: @select_cond_with_eq_true_false_elts3( +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[TVAL]], <4 x float> [[FVAL]] +; CHECK-NEXT: ret <4 x float> [[R]] +; + %tval = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> + %fval = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> + %cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> + %r = select <4 x i1> %cond, <4 x float> %tval, <4 x float> %fval + ret <4 x float> %r +} + +define <4 x i8> @select_cond_with_undef_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { +; CHECK-LABEL: @select_cond_with_undef_true_false_elts( +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> undef, <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[TVAL]], <4 x i8> [[X:%.*]] +; CHECK-NEXT: ret <4 x i8> [[R]] +; + %tval = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> + %cond = shufflevector <4 x i1> %cmp, <4 x i1> undef, <4 x i32> + %r = select <4 x i1> %cond, <4 x i8> %tval, <4 x i8> %x + ret <4 x i8> %r +} diff --git a/llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll b/llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll index 48110e3283cfb6..4e0346c14c3452 100644 --- a/llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll +++ b/llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll @@ -1,20 +1,112 @@ ; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s ; PR7328 ; PR7506 -define i32 @foo(i32 %x) { -; CHECK-LABEL: define i32 @foo( -; CHECK: %accumulator.tr = phi i32 [ 1, %entry ], [ 0, %body ] +define i32 @test1_constants(i32 %x) { entry: %cond = icmp ugt i32 %x, 0 ; [#uses=1] br i1 %cond, label %return, label %body body: ; preds = %entry %y = add i32 %x, 1 ; [#uses=1] - %tmp = call i32 @foo(i32 %y) ; [#uses=0] -; CHECK-NOT: call + %recurse = call i32 @test1_constants(i32 %y) ; [#uses=0] ret i32 0 -; CHECK: ret i32 %accumulator.tr return: ; preds = %entry ret i32 1 } + +; CHECK-LABEL: define i32 @test1_constants( +; CHECK: tailrecurse: +; CHECK: %ret.tr = phi i32 [ undef, %entry ], [ %current.ret.tr, %body ] +; CHECK: %ret.known.tr = phi i1 [ false, %entry ], [ true, %body ] +; CHECK: body: +; CHECK-NOT: %recurse +; CHECK: %current.ret.tr = select i1 %ret.known.tr, i32 %ret.tr, i32 0 +; CHECK-NOT: ret +; CHECK: return: +; CHECK: %current.ret.tr1 = select i1 %ret.known.tr, i32 %ret.tr, i32 1 +; CHECK: ret i32 %current.ret.tr1 + +define i32 @test2_non_constants(i32 %x) { +entry: + %cond = icmp ugt i32 %x, 0 + br i1 %cond, label %return, label %body + +body: + %y = add i32 %x, 1 + %helper1 = call i32 @test2_helper() + %recurse = call i32 @test2_non_constants(i32 %y) + ret i32 %helper1 + +return: + %helper2 = call i32 @test2_helper() + ret i32 %helper2 +} + +declare i32 @test2_helper() + +; CHECK-LABEL: define i32 @test2_non_constants( +; CHECK: tailrecurse: +; CHECK: %ret.tr = phi i32 [ undef, %entry ], [ %current.ret.tr, %body ] +; CHECK: %ret.known.tr = phi i1 [ false, %entry ], [ true, %body ] +; CHECK: body: +; CHECK-NOT: %recurse +; CHECK: %current.ret.tr = select i1 %ret.known.tr, i32 %ret.tr, i32 %helper1 +; CHECK-NOT: ret +; CHECK: return: +; CHECK: %current.ret.tr1 = select i1 %ret.known.tr, i32 %ret.tr, i32 %helper2 +; CHECK: ret i32 %current.ret.tr1 + +define i32 @test3_mixed(i32 %x) { +entry: + switch i32 %x, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + ] + +case0: + %helper1 = call i32 @test3_helper() + br label %return + +case1: + %y1 = add i32 %x, -1 + %recurse1 = call i32 @test3_mixed(i32 %y1) + br label %return + +case2: + %y2 = add i32 %x, -1 + %helper2 = call i32 @test3_helper() + %recurse2 = call i32 @test3_mixed(i32 %y2) + br label %return + +default: + %y3 = urem i32 %x, 3 + %recurse3 = call i32 @test3_mixed(i32 %y3) + br label %return + +return: + %retval = phi i32 [ %recurse3, %default ], [ %helper2, %case2 ], [ 9, %case1 ], [ %helper1, %case0 ] + ret i32 %retval +} + +declare i32 @test3_helper() + +; CHECK-LABEL: define i32 @test3_mixed( +; CHECK: tailrecurse: +; CHECK: %ret.tr = phi i32 [ undef, %entry ], [ %current.ret.tr, %case1 ], [ %current.ret.tr1, %case2 ], [ %ret.tr, %default ] +; CHECK: %ret.known.tr = phi i1 [ false, %entry ], [ true, %case1 ], [ true, %case2 ], [ %ret.known.tr, %default ] +; CHECK: case1: +; CHECK-NOT: %recurse +; CHECK: %current.ret.tr = select i1 %ret.known.tr, i32 %ret.tr, i32 9 +; CHECK: br label %tailrecurse +; CHECK: case2: +; CHECK-NOT: %recurse +; CHECK: %current.ret.tr1 = select i1 %ret.known.tr, i32 %ret.tr, i32 %helper2 +; CHECK: br label %tailrecurse +; CHECK: default: +; CHECK-NOT: %recurse +; CHECK: br label %tailrecurse +; CHECK: return: +; CHECK: %current.ret.tr2 = select i1 %ret.known.tr, i32 %ret.tr, i32 %helper1 +; CHECK: ret i32 %current.ret.tr2 diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll index 576f2fec1244f6..6116014a024b19 100644 --- a/llvm/test/Transforms/TailCallElim/basic.ll +++ b/llvm/test/Transforms/TailCallElim/basic.ll @@ -46,8 +46,16 @@ endif.0: ; preds = %entry ; plunked it into the demo script, so maybe they care about it. define i32 @test3(i32 %c) { ; CHECK: i32 @test3 +; CHECK: tailrecurse: +; CHECK: %ret.tr = phi i32 [ undef, %entry ], [ %current.ret.tr, %else ] +; CHECK: %ret.known.tr = phi i1 [ false, %entry ], [ true, %else ] +; CHECK: else: ; CHECK-NOT: call -; CHECK: ret i32 0 +; CHECK: %current.ret.tr = select i1 %ret.known.tr, i32 %ret.tr, i32 0 +; CHECK-NOT: ret +; CHECK: return: +; CHECK: %current.ret.tr1 = select i1 %ret.known.tr, i32 %ret.tr, i32 0 +; CHECK: ret i32 %current.ret.tr1 entry: %tmp.1 = icmp eq i32 %c, 0 ; [#uses=1] br i1 %tmp.1, label %return, label %else diff --git a/llvm/test/Verifier/disubrange-count-upperBound.ll b/llvm/test/Verifier/disubrange-count-upperBound.ll new file mode 100644 index 00000000000000..3dbc79004f022f --- /dev/null +++ b/llvm/test/Verifier/disubrange-count-upperBound.ll @@ -0,0 +1,5 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +!named = !{!0} +; CHECK: Subrange can have any one of count or upperBound +!0 = !DISubrange(count: 20, lowerBound: 1, upperBound: 10) diff --git a/llvm/test/Verifier/disubrange-missing-upperBound.ll b/llvm/test/Verifier/disubrange-missing-upperBound.ll new file mode 100644 index 00000000000000..26b707caa6093c --- /dev/null +++ b/llvm/test/Verifier/disubrange-missing-upperBound.ll @@ -0,0 +1,5 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +!named = !{!0} +; CHECK: Subrange must contain count or upperBound +!0 = !DISubrange(lowerBound: 1, stride: 4) diff --git a/llvm/test/Verifier/invalid-disubrange-lowerBound.ll b/llvm/test/Verifier/invalid-disubrange-lowerBound.ll new file mode 100644 index 00000000000000..37a449a832908a --- /dev/null +++ b/llvm/test/Verifier/invalid-disubrange-lowerBound.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +!named = !{!0, !1} +; CHECK: LowerBound must be signed constant or DIVariable or DIExpression +!0 = !DISubrange(lowerBound: !1, upperBound: 1) +!1 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) diff --git a/llvm/test/Verifier/invalid-disubrange-stride.ll b/llvm/test/Verifier/invalid-disubrange-stride.ll new file mode 100644 index 00000000000000..eae6b625911e42 --- /dev/null +++ b/llvm/test/Verifier/invalid-disubrange-stride.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +!named = !{!0, !1} +; CHECK: Stride must be signed constant or DIVariable or DIExpression +!0 = !DISubrange(upperBound: 1, stride: !1) +!1 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) diff --git a/llvm/test/Verifier/invalid-disubrange-upperBound.ll b/llvm/test/Verifier/invalid-disubrange-upperBound.ll new file mode 100644 index 00000000000000..d4daa6ba7e1e2c --- /dev/null +++ b/llvm/test/Verifier/invalid-disubrange-upperBound.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +!named = !{!0, !1} +; CHECK: UpperBound must be signed constant or DIVariable or DIExpression +!0 = !DISubrange(lowerBound: 1, upperBound: !1) +!1 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed) diff --git a/llvm/test/tools/llvm-readobj/ELF/file-header-machine-types.test b/llvm/test/tools/llvm-readobj/ELF/file-header-machine-types.test new file mode 100644 index 00000000000000..a497faacf8b3c5 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/file-header-machine-types.test @@ -0,0 +1,487 @@ +## Show that all machine codes are correctly printed. + +# RUN: yaml2obj %s -o %t.none.o -D MACHINE=EM_NONE +# RUN: llvm-readelf --file-headers %t.none.o | FileCheck %s -DMACHINE="None" + +# RUN: yaml2obj %s -o %t.m32.o -D MACHINE=EM_M32 +# RUN: llvm-readelf --file-headers %t.m32.o | FileCheck %s -DMACHINE="WE32100" + +# RUN: yaml2obj %s -o %t.sparc.o -D MACHINE=EM_SPARC +# RUN: llvm-readelf --file-headers %t.sparc.o | FileCheck %s -DMACHINE="Sparc" + +# RUN: yaml2obj %s -o %t.386.o -D MACHINE=EM_386 +# RUN: llvm-readelf --file-headers %t.386.o | FileCheck %s -DMACHINE="Intel 80386" + +# RUN: yaml2obj %s -o %t.68k.o -D MACHINE=EM_68K +# RUN: llvm-readelf --file-headers %t.68k.o | FileCheck %s -DMACHINE="MC68000" + +# RUN: yaml2obj %s -o %t.88k.o -D MACHINE=EM_88K +# RUN: llvm-readelf --file-headers %t.88k.o | FileCheck %s -DMACHINE="MC88000" + +# RUN: yaml2obj %s -o %t.iamcu.o -D MACHINE=EM_IAMCU +# RUN: llvm-readelf --file-headers %t.iamcu.o | FileCheck %s -DMACHINE="EM_IAMCU" + +# RUN: yaml2obj %s -o %t.860.o -D MACHINE=EM_860 +# RUN: llvm-readelf --file-headers %t.860.o | FileCheck %s -DMACHINE="Intel 80860" + +# RUN: yaml2obj %s -o %t.mips.o -D MACHINE=EM_MIPS +# RUN: llvm-readelf --file-headers %t.mips.o | FileCheck %s -DMACHINE="MIPS R3000" + +# RUN: yaml2obj %s -o %t.s370.o -D MACHINE=EM_S370 +# RUN: llvm-readelf --file-headers %t.s370.o | FileCheck %s -DMACHINE="IBM System/370" + +# RUN: yaml2obj %s -o %t.mips_rs3_le.o -D MACHINE=EM_MIPS_RS3_LE +# RUN: llvm-readelf --file-headers %t.mips_rs3_le.o | FileCheck %s -DMACHINE="MIPS R3000 little-endian" + +# RUN: yaml2obj %s -o %t.parisc.o -D MACHINE=EM_PARISC +# RUN: llvm-readelf --file-headers %t.parisc.o | FileCheck %s -DMACHINE="HPPA" + +# RUN: yaml2obj %s -o %t.vpp500.o -D MACHINE=EM_VPP500 +# RUN: llvm-readelf --file-headers %t.vpp500.o | FileCheck %s -DMACHINE="Fujitsu VPP500" + +# RUN: yaml2obj %s -o %t.sparc32plus.o -D MACHINE=EM_SPARC32PLUS +# RUN: llvm-readelf --file-headers %t.sparc32plus.o | FileCheck %s -DMACHINE="Sparc v8+" + +# RUN: yaml2obj %s -o %t.960.o -D MACHINE=EM_960 +# RUN: llvm-readelf --file-headers %t.960.o | FileCheck %s -DMACHINE="Intel 80960" + +# RUN: yaml2obj %s -o %t.ppc.o -D MACHINE=EM_PPC +# RUN: llvm-readelf --file-headers %t.ppc.o | FileCheck %s -DMACHINE="PowerPC" + +# RUN: yaml2obj %s -o %t.ppc64.o -D MACHINE=EM_PPC64 +# RUN: llvm-readelf --file-headers %t.ppc64.o | FileCheck %s -DMACHINE="PowerPC64" + +# RUN: yaml2obj %s -o %t.s390.o -D MACHINE=EM_S390 +# RUN: llvm-readelf --file-headers %t.s390.o | FileCheck %s -DMACHINE="IBM S/390" + +# RUN: yaml2obj %s -o %t.spu.o -D MACHINE=EM_SPU +# RUN: llvm-readelf --file-headers %t.spu.o | FileCheck %s -DMACHINE="SPU" + +# RUN: yaml2obj %s -o %t.v800.o -D MACHINE=EM_V800 +# RUN: llvm-readelf --file-headers %t.v800.o | FileCheck %s -DMACHINE="NEC V800 series" + +# RUN: yaml2obj %s -o %t.fr20.o -D MACHINE=EM_FR20 +# RUN: llvm-readelf --file-headers %t.fr20.o | FileCheck %s -DMACHINE="Fujistsu FR20" + +# RUN: yaml2obj %s -o %t.rh32.o -D MACHINE=EM_RH32 +# RUN: llvm-readelf --file-headers %t.rh32.o | FileCheck %s -DMACHINE="TRW RH-32" + +# RUN: yaml2obj %s -o %t.rce.o -D MACHINE=EM_RCE +# RUN: llvm-readelf --file-headers %t.rce.o | FileCheck %s -DMACHINE="Motorola RCE" + +# RUN: yaml2obj %s -o %t.arm.o -D MACHINE=EM_ARM +# RUN: llvm-readelf --file-headers %t.arm.o | FileCheck %s -DMACHINE="ARM" + +# RUN: yaml2obj %s -o %t.alpha.o -D MACHINE=EM_ALPHA +# RUN: llvm-readelf --file-headers %t.alpha.o | FileCheck %s -DMACHINE="EM_ALPHA" + +# RUN: yaml2obj %s -o %t.sh.o -D MACHINE=EM_SH +# RUN: llvm-readelf --file-headers %t.sh.o | FileCheck %s -DMACHINE="Hitachi SH" + +# RUN: yaml2obj %s -o %t.sparcv9.o -D MACHINE=EM_SPARCV9 +# RUN: llvm-readelf --file-headers %t.sparcv9.o | FileCheck %s -DMACHINE="Sparc v9" + +# RUN: yaml2obj %s -o %t.tricore.o -D MACHINE=EM_TRICORE +# RUN: llvm-readelf --file-headers %t.tricore.o | FileCheck %s -DMACHINE="Siemens Tricore" + +# RUN: yaml2obj %s -o %t.arc.o -D MACHINE=EM_ARC +# RUN: llvm-readelf --file-headers %t.arc.o | FileCheck %s -DMACHINE="ARC" + +# RUN: yaml2obj %s -o %t.h8_300.o -D MACHINE=EM_H8_300 +# RUN: llvm-readelf --file-headers %t.h8_300.o | FileCheck %s -DMACHINE="Hitachi H8/300" + +# RUN: yaml2obj %s -o %t.h8_300h.o -D MACHINE=EM_H8_300H +# RUN: llvm-readelf --file-headers %t.h8_300h.o | FileCheck %s -DMACHINE="Hitachi H8/300H" + +# RUN: yaml2obj %s -o %t.h8s.o -D MACHINE=EM_H8S +# RUN: llvm-readelf --file-headers %t.h8s.o | FileCheck %s -DMACHINE="Hitachi H8S" + +# RUN: yaml2obj %s -o %t.h8_500.o -D MACHINE=EM_H8_500 +# RUN: llvm-readelf --file-headers %t.h8_500.o | FileCheck %s -DMACHINE="Hitachi H8/500" + +# RUN: yaml2obj %s -o %t.ia_64.o -D MACHINE=EM_IA_64 +# RUN: llvm-readelf --file-headers %t.ia_64.o | FileCheck %s -DMACHINE="Intel IA-64" + +# RUN: yaml2obj %s -o %t.mips_x.o -D MACHINE=EM_MIPS_X +# RUN: llvm-readelf --file-headers %t.mips_x.o | FileCheck %s -DMACHINE="Stanford MIPS-X" + +# RUN: yaml2obj %s -o %t.coldfire.o -D MACHINE=EM_COLDFIRE +# RUN: llvm-readelf --file-headers %t.coldfire.o | FileCheck %s -DMACHINE="Motorola Coldfire" + +# RUN: yaml2obj %s -o %t.68hc12.o -D MACHINE=EM_68HC12 +# RUN: llvm-readelf --file-headers %t.68hc12.o | FileCheck %s -DMACHINE="Motorola MC68HC12 Microcontroller" + +# RUN: yaml2obj %s -o %t.mma.o -D MACHINE=EM_MMA +# RUN: llvm-readelf --file-headers %t.mma.o | FileCheck %s -DMACHINE="Fujitsu Multimedia Accelerator" + +# RUN: yaml2obj %s -o %t.pcp.o -D MACHINE=EM_PCP +# RUN: llvm-readelf --file-headers %t.pcp.o | FileCheck %s -DMACHINE="Siemens PCP" + +# RUN: yaml2obj %s -o %t.ncpu.o -D MACHINE=EM_NCPU +# RUN: llvm-readelf --file-headers %t.ncpu.o | FileCheck %s -DMACHINE="Sony nCPU embedded RISC processor" + +# RUN: yaml2obj %s -o %t.ndri.o -D MACHINE=EM_NDR1 +# RUN: llvm-readelf --file-headers %t.ndri.o | FileCheck %s -DMACHINE="Denso NDR1 microprocesspr" + +# RUN: yaml2obj %s -o %t.starcore.o -D MACHINE=EM_STARCORE +# RUN: llvm-readelf --file-headers %t.starcore.o | FileCheck %s -DMACHINE="Motorola Star*Core processor" + +# RUN: yaml2obj %s -o %t.me16.o -D MACHINE=EM_ME16 +# RUN: llvm-readelf --file-headers %t.me16.o | FileCheck %s -DMACHINE="Toyota ME16 processor" + +# RUN: yaml2obj %s -o %t.st100.o -D MACHINE=EM_ST100 +# RUN: llvm-readelf --file-headers %t.st100.o | FileCheck %s -DMACHINE="STMicroelectronics ST100 processor" + +# RUN: yaml2obj %s -o %t.tinyj.o -D MACHINE=EM_TINYJ +# RUN: llvm-readelf --file-headers %t.tinyj.o | FileCheck %s -DMACHINE="Advanced Logic Corp. TinyJ embedded processor" + +# RUN: yaml2obj %s -o %t.x86_64.o -D MACHINE=EM_X86_64 +# RUN: llvm-readelf --file-headers %t.x86_64.o | FileCheck %s -DMACHINE="Advanced Micro Devices X86-64" + +# RUN: yaml2obj %s -o %t.pdsp.o -D MACHINE=EM_PDSP +# RUN: llvm-readelf --file-headers %t.pdsp.o | FileCheck %s -DMACHINE="Sony DSP processor" + +# RUN: yaml2obj %s -o %t.pdp10.o -D MACHINE=EM_PDP10 +# RUN: llvm-readelf --file-headers %t.pdp10.o | FileCheck %s -DMACHINE="Digital Equipment Corp. PDP-10" + +# RUN: yaml2obj %s -o %t.pdp11.o -D MACHINE=EM_PDP11 +# RUN: llvm-readelf --file-headers %t.pdp11.o | FileCheck %s -DMACHINE="Digital Equipment Corp. PDP-11" + +# RUN: yaml2obj %s -o %t.fx66.o -D MACHINE=EM_FX66 +# RUN: llvm-readelf --file-headers %t.fx66.o | FileCheck %s -DMACHINE="Siemens FX66 microcontroller" + +# RUN: yaml2obj %s -o %t.st9plus.o -D MACHINE=EM_ST9PLUS +# RUN: llvm-readelf --file-headers %t.st9plus.o | FileCheck %s -DMACHINE="STMicroelectronics ST9+ 8/16 bit microcontroller" + +# RUN: yaml2obj %s -o %t.st7.o -D MACHINE=EM_ST7 +# RUN: llvm-readelf --file-headers %t.st7.o | FileCheck %s -DMACHINE="STMicroelectronics ST7 8-bit microcontroller" + +# RUN: yaml2obj %s -o %t.68hc16.o -D MACHINE=EM_68HC16 +# RUN: llvm-readelf --file-headers %t.68hc16.o | FileCheck %s -DMACHINE="Motorola MC68HC16 Microcontroller" + +# RUN: yaml2obj %s -o %t.68hc11.o -D MACHINE=EM_68HC11 +# RUN: llvm-readelf --file-headers %t.68hc11.o | FileCheck %s -DMACHINE="Motorola MC68HC11 Microcontroller" + +# RUN: yaml2obj %s -o %t.68hc08.o -D MACHINE=EM_68HC08 +# RUN: llvm-readelf --file-headers %t.68hc08.o | FileCheck %s -DMACHINE="Motorola MC68HC08 Microcontroller" + +# RUN: yaml2obj %s -o %t.68hc05.o -D MACHINE=EM_68HC05 +# RUN: llvm-readelf --file-headers %t.68hc05.o | FileCheck %s -DMACHINE="Motorola MC68HC05 Microcontroller" + +# RUN: yaml2obj %s -o %t.svx.o -D MACHINE=EM_SVX +# RUN: llvm-readelf --file-headers %t.svx.o | FileCheck %s -DMACHINE="Silicon Graphics SVx" + +# RUN: yaml2obj %s -o %t.st19.o -D MACHINE=EM_ST19 +# RUN: llvm-readelf --file-headers %t.st19.o | FileCheck %s -DMACHINE="STMicroelectronics ST19 8-bit microcontroller" + +# RUN: yaml2obj %s -o %t.vax.o -D MACHINE=EM_VAX +# RUN: llvm-readelf --file-headers %t.vax.o | FileCheck %s -DMACHINE="Digital VAX" + +# RUN: yaml2obj %s -o %t.cris.o -D MACHINE=EM_CRIS +# RUN: llvm-readelf --file-headers %t.cris.o | FileCheck %s -DMACHINE="Axis Communications 32-bit embedded processor" + +# RUN: yaml2obj %s -o %t.javelin.o -D MACHINE=EM_JAVELIN +# RUN: llvm-readelf --file-headers %t.javelin.o | FileCheck %s -DMACHINE="Infineon Technologies 32-bit embedded cpu" + +# RUN: yaml2obj %s -o %t.firepath.o -D MACHINE=EM_FIREPATH +# RUN: llvm-readelf --file-headers %t.firepath.o | FileCheck %s -DMACHINE="Element 14 64-bit DSP processor" + +# RUN: yaml2obj %s -o %t.zsp.o -D MACHINE=EM_ZSP +# RUN: llvm-readelf --file-headers %t.zsp.o | FileCheck %s -DMACHINE="LSI Logic's 16-bit DSP processor" + +# RUN: yaml2obj %s -o %t.mmix.o -D MACHINE=EM_MMIX +# RUN: llvm-readelf --file-headers %t.mmix.o | FileCheck %s -DMACHINE="Donald Knuth's educational 64-bit processor" + +# RUN: yaml2obj %s -o %t.huany.o -D MACHINE=EM_HUANY +# RUN: llvm-readelf --file-headers %t.huany.o | FileCheck %s -DMACHINE="Harvard Universitys's machine-independent object format" + +# RUN: yaml2obj %s -o %t.prism.o -D MACHINE=EM_PRISM +# RUN: llvm-readelf --file-headers %t.prism.o | FileCheck %s -DMACHINE="Vitesse Prism" + +# RUN: yaml2obj %s -o %t.avr.o -D MACHINE=EM_AVR +# RUN: llvm-readelf --file-headers %t.avr.o | FileCheck %s -DMACHINE="Atmel AVR 8-bit microcontroller" + +# RUN: yaml2obj %s -o %t.fr30.o -D MACHINE=EM_FR30 +# RUN: llvm-readelf --file-headers %t.fr30.o | FileCheck %s -DMACHINE="Fujitsu FR30" + +# RUN: yaml2obj %s -o %t.d10v.o -D MACHINE=EM_D10V +# RUN: llvm-readelf --file-headers %t.d10v.o | FileCheck %s -DMACHINE="Mitsubishi D10V" + +# RUN: yaml2obj %s -o %t.d30v.o -D MACHINE=EM_D30V +# RUN: llvm-readelf --file-headers %t.d30v.o | FileCheck %s -DMACHINE="Mitsubishi D30V" + +# RUN: yaml2obj %s -o %t.v850.o -D MACHINE=EM_V850 +# RUN: llvm-readelf --file-headers %t.v850.o | FileCheck %s -DMACHINE="NEC v850" + +# RUN: yaml2obj %s -o %t.m32r.o -D MACHINE=EM_M32R +# RUN: llvm-readelf --file-headers %t.m32r.o | FileCheck %s -DMACHINE="Renesas M32R (formerly Mitsubishi M32r)" + +# RUN: yaml2obj %s -o %t.mn10300.o -D MACHINE=EM_MN10300 +# RUN: llvm-readelf --file-headers %t.mn10300.o | FileCheck %s -DMACHINE="Matsushita MN10300" + +# RUN: yaml2obj %s -o %t.mn10200.o -D MACHINE=EM_MN10200 +# RUN: llvm-readelf --file-headers %t.mn10200.o | FileCheck %s -DMACHINE="Matsushita MN10200" + +# RUN: yaml2obj %s -o %t.pj.o -D MACHINE=EM_PJ +# RUN: llvm-readelf --file-headers %t.pj.o | FileCheck %s -DMACHINE="picoJava" + +# RUN: yaml2obj %s -o %t.openrisc.o -D MACHINE=EM_OPENRISC +# RUN: llvm-readelf --file-headers %t.openrisc.o | FileCheck %s -DMACHINE="OpenRISC 32-bit embedded processor" + +# RUN: yaml2obj %s -o %t.arc_compact.o -D MACHINE=EM_ARC_COMPACT +# RUN: llvm-readelf --file-headers %t.arc_compact.o | FileCheck %s -DMACHINE="EM_ARC_COMPACT" + +# RUN: yaml2obj %s -o %t.xtensa.o -D MACHINE=EM_XTENSA +# RUN: llvm-readelf --file-headers %t.xtensa.o | FileCheck %s -DMACHINE="Tensilica Xtensa Processor" + +# RUN: yaml2obj %s -o %t.videocore.o -D MACHINE=EM_VIDEOCORE +# RUN: llvm-readelf --file-headers %t.videocore.o | FileCheck %s -DMACHINE="Alphamosaic VideoCore processor" + +# RUN: yaml2obj %s -o %t.tmm_gpp.o -D MACHINE=EM_TMM_GPP +# RUN: llvm-readelf --file-headers %t.tmm_gpp.o | FileCheck %s -DMACHINE="Thompson Multimedia General Purpose Processor" + +# RUN: yaml2obj %s -o %t.ns32k.o -D MACHINE=EM_NS32K +# RUN: llvm-readelf --file-headers %t.ns32k.o | FileCheck %s -DMACHINE="National Semiconductor 32000 series" + +# RUN: yaml2obj %s -o %t.tpc.o -D MACHINE=EM_TPC +# RUN: llvm-readelf --file-headers %t.tpc.o | FileCheck %s -DMACHINE="Tenor Network TPC processor" + +# RUN: yaml2obj %s -o %t.snp1k.o -D MACHINE=EM_SNP1K +# RUN: llvm-readelf --file-headers %t.snp1k.o | FileCheck %s -DMACHINE="EM_SNP1K" + +# RUN: yaml2obj %s -o %t.st200.o -D MACHINE=EM_ST200 +# RUN: llvm-readelf --file-headers %t.st200.o | FileCheck %s -DMACHINE="STMicroelectronics ST200 microcontroller" + +# RUN: yaml2obj %s -o %t.ip2k.o -D MACHINE=EM_IP2K +# RUN: llvm-readelf --file-headers %t.ip2k.o | FileCheck %s -DMACHINE="Ubicom IP2xxx 8-bit microcontrollers" + +# RUN: yaml2obj %s -o %t.max.o -D MACHINE=EM_MAX +# RUN: llvm-readelf --file-headers %t.max.o | FileCheck %s -DMACHINE="MAX Processor" + +# RUN: yaml2obj %s -o %t.cr.o -D MACHINE=EM_CR +# RUN: llvm-readelf --file-headers %t.cr.o | FileCheck %s -DMACHINE="National Semiconductor CompactRISC" + +# RUN: yaml2obj %s -o %t.f2mc16.o -D MACHINE=EM_F2MC16 +# RUN: llvm-readelf --file-headers %t.f2mc16.o | FileCheck %s -DMACHINE="Fujitsu F2MC16" + +# RUN: yaml2obj %s -o %t.msp430.o -D MACHINE=EM_MSP430 +# RUN: llvm-readelf --file-headers %t.msp430.o | FileCheck %s -DMACHINE="Texas Instruments msp430 microcontroller" + +# RUN: yaml2obj %s -o %t.blackfin.o -D MACHINE=EM_BLACKFIN +# RUN: llvm-readelf --file-headers %t.blackfin.o | FileCheck %s -DMACHINE="Analog Devices Blackfin" + +# RUN: yaml2obj %s -o %t.se_c33.o -D MACHINE=EM_SE_C33 +# RUN: llvm-readelf --file-headers %t.se_c33.o | FileCheck %s -DMACHINE="S1C33 Family of Seiko Epson processors" + +# RUN: yaml2obj %s -o %t.sep.o -D MACHINE=EM_SEP +# RUN: llvm-readelf --file-headers %t.sep.o | FileCheck %s -DMACHINE="Sharp embedded microprocessor" + +# RUN: yaml2obj %s -o %t.arca.o -D MACHINE=EM_ARCA +# RUN: llvm-readelf --file-headers %t.arca.o | FileCheck %s -DMACHINE="Arca RISC microprocessor" + +# RUN: yaml2obj %s -o %t.unicore.o -D MACHINE=EM_UNICORE +# RUN: llvm-readelf --file-headers %t.unicore.o | FileCheck %s -DMACHINE="Unicore" + +# RUN: yaml2obj %s -o %t.excess.o -D MACHINE=EM_EXCESS +# RUN: llvm-readelf --file-headers %t.excess.o | FileCheck %s -DMACHINE="eXcess 16/32/64-bit configurable embedded CPU" + +# RUN: yaml2obj %s -o %t.dxp.o -D MACHINE=EM_DXP +# RUN: llvm-readelf --file-headers %t.dxp.o | FileCheck %s -DMACHINE="Icera Semiconductor Inc. Deep Execution Processor" + +# RUN: yaml2obj %s -o %t.altera_nios2.o -D MACHINE=EM_ALTERA_NIOS2 +# RUN: llvm-readelf --file-headers %t.altera_nios2.o | FileCheck %s -DMACHINE="Altera Nios" + +# RUN: yaml2obj %s -o %t.crx.o -D MACHINE=EM_CRX +# RUN: llvm-readelf --file-headers %t.crx.o | FileCheck %s -DMACHINE="National Semiconductor CRX microprocessor" + +# RUN: yaml2obj %s -o %t.xgate.o -D MACHINE=EM_XGATE +# RUN: llvm-readelf --file-headers %t.xgate.o | FileCheck %s -DMACHINE="Motorola XGATE embedded processor" + +# RUN: yaml2obj %s -o %t.c166.o -D MACHINE=EM_C166 +# RUN: llvm-readelf --file-headers %t.c166.o | FileCheck %s -DMACHINE="Infineon Technologies xc16x" + +# RUN: yaml2obj %s -o %t.m16c.o -D MACHINE=EM_M16C +# RUN: llvm-readelf --file-headers %t.m16c.o | FileCheck %s -DMACHINE="Renesas M16C" + +# RUN: yaml2obj %s -o %t.dspic30f.o -D MACHINE=EM_DSPIC30F +# RUN: llvm-readelf --file-headers %t.dspic30f.o | FileCheck %s -DMACHINE="Microchip Technology dsPIC30F Digital Signal Controller" + +# RUN: yaml2obj %s -o %t.ce.o -D MACHINE=EM_CE +# RUN: llvm-readelf --file-headers %t.ce.o | FileCheck %s -DMACHINE="Freescale Communication Engine RISC core" + +# RUN: yaml2obj %s -o %t.m32c.o -D MACHINE=EM_M32C +# RUN: llvm-readelf --file-headers %t.m32c.o | FileCheck %s -DMACHINE="Renesas M32C" + +# RUN: yaml2obj %s -o %t.tsk3000.o -D MACHINE=EM_TSK3000 +# RUN: llvm-readelf --file-headers %t.tsk3000.o | FileCheck %s -DMACHINE="Altium TSK3000 core" + +# RUN: yaml2obj %s -o %t.rs08.o -D MACHINE=EM_RS08 +# RUN: llvm-readelf --file-headers %t.rs08.o | FileCheck %s -DMACHINE="Freescale RS08 embedded processor" + +# RUN: yaml2obj %s -o %t.sharc.o -D MACHINE=EM_SHARC +# RUN: llvm-readelf --file-headers %t.sharc.o | FileCheck %s -DMACHINE="EM_SHARC" + +# RUN: yaml2obj %s -o %t.ecog2.o -D MACHINE=EM_ECOG2 +# RUN: llvm-readelf --file-headers %t.ecog2.o | FileCheck %s -DMACHINE="Cyan Technology eCOG2 microprocessor" + +# RUN: yaml2obj %s -o %t.score7.o -D MACHINE=EM_SCORE7 +# RUN: llvm-readelf --file-headers %t.score7.o | FileCheck %s -DMACHINE="SUNPLUS S+Core" + +# RUN: yaml2obj %s -o %t.dsp24.o -D MACHINE=EM_DSP24 +# RUN: llvm-readelf --file-headers %t.dsp24.o | FileCheck %s -DMACHINE="New Japan Radio (NJR) 24-bit DSP Processor" + +# RUN: yaml2obj %s -o %t.videocore3.o -D MACHINE=EM_VIDEOCORE3 +# RUN: llvm-readelf --file-headers %t.videocore3.o | FileCheck %s -DMACHINE="Broadcom VideoCore III processor" + +# RUN: yaml2obj %s -o %t.latticemico32.o -D MACHINE=EM_LATTICEMICO32 +# RUN: llvm-readelf --file-headers %t.latticemico32.o | FileCheck %s -DMACHINE="Lattice Mico32" + +# RUN: yaml2obj %s -o %t.se_c17.o -D MACHINE=EM_SE_C17 +# RUN: llvm-readelf --file-headers %t.se_c17.o | FileCheck %s -DMACHINE="Seiko Epson C17 family" + +# RUN: yaml2obj %s -o %t.ti_c6000.o -D MACHINE=EM_TI_C6000 +# RUN: llvm-readelf --file-headers %t.ti_c6000.o | FileCheck %s -DMACHINE="Texas Instruments TMS320C6000 DSP family" + +# RUN: yaml2obj %s -o %t.ti_c2000.o -D MACHINE=EM_TI_C2000 +# RUN: llvm-readelf --file-headers %t.ti_c2000.o | FileCheck %s -DMACHINE="Texas Instruments TMS320C2000 DSP family" + +# RUN: yaml2obj %s -o %t.ti_c5500.o -D MACHINE=EM_TI_C5500 +# RUN: llvm-readelf --file-headers %t.ti_c5500.o | FileCheck %s -DMACHINE="Texas Instruments TMS320C55x DSP family" + +# RUN: yaml2obj %s -o %t.mmdsp_plus.o -D MACHINE=EM_MMDSP_PLUS +# RUN: llvm-readelf --file-headers %t.mmdsp_plus.o | FileCheck %s -DMACHINE="STMicroelectronics 64bit VLIW Data Signal Processor" + +# RUN: yaml2obj %s -o %t.cypress_m8c.o -D MACHINE=EM_CYPRESS_M8C +# RUN: llvm-readelf --file-headers %t.cypress_m8c.o | FileCheck %s -DMACHINE="Cypress M8C microprocessor" + +# RUN: yaml2obj %s -o %t.r32c.o -D MACHINE=EM_R32C +# RUN: llvm-readelf --file-headers %t.r32c.o | FileCheck %s -DMACHINE="Renesas R32C series microprocessors" + +# RUN: yaml2obj %s -o %t.trimedia.o -D MACHINE=EM_TRIMEDIA +# RUN: llvm-readelf --file-headers %t.trimedia.o | FileCheck %s -DMACHINE="NXP Semiconductors TriMedia architecture family" + +# RUN: yaml2obj %s -o %t.hexagon.o -D MACHINE=EM_HEXAGON +# RUN: llvm-readelf --file-headers %t.hexagon.o | FileCheck %s -DMACHINE="Qualcomm Hexagon" + +# RUN: yaml2obj %s -o %t.8051.o -D MACHINE=EM_8051 +# RUN: llvm-readelf --file-headers %t.8051.o | FileCheck %s -DMACHINE="Intel 8051 and variants" + +# RUN: yaml2obj %s -o %t.stxp7x.o -D MACHINE=EM_STXP7X +# RUN: llvm-readelf --file-headers %t.stxp7x.o | FileCheck %s -DMACHINE="STMicroelectronics STxP7x family" + +# RUN: yaml2obj %s -o %t.nds32.o -D MACHINE=EM_NDS32 +# RUN: llvm-readelf --file-headers %t.nds32.o | FileCheck %s -DMACHINE="Andes Technology compact code size embedded RISC processor family" + +# RUN: yaml2obj %s -o %t.ecog1.o -D MACHINE=EM_ECOG1 +# RUN: llvm-readelf --file-headers %t.ecog1.o | FileCheck %s -DMACHINE="Cyan Technology eCOG1 microprocessor" + +# RUN: yaml2obj %s -o %t.maxq30.o -D MACHINE=EM_MAXQ30 +# RUN: llvm-readelf --file-headers %t.maxq30.o | FileCheck %s -DMACHINE="Dallas Semiconductor MAXQ30 Core microcontrollers" + +# RUN: yaml2obj %s -o %t.ximo16.o -D MACHINE=EM_XIMO16 +# RUN: llvm-readelf --file-headers %t.ximo16.o | FileCheck %s -DMACHINE="New Japan Radio (NJR) 16-bit DSP Processor" + +# RUN: yaml2obj %s -o %t.manik.o -D MACHINE=EM_MANIK +# RUN: llvm-readelf --file-headers %t.manik.o | FileCheck %s -DMACHINE="M2000 Reconfigurable RISC Microprocessor" + +# RUN: yaml2obj %s -o %t.craynv2.o -D MACHINE=EM_CRAYNV2 +# RUN: llvm-readelf --file-headers %t.craynv2.o | FileCheck %s -DMACHINE="Cray Inc. NV2 vector architecture" + +# RUN: yaml2obj %s -o %t.rx.o -D MACHINE=EM_RX +# RUN: llvm-readelf --file-headers %t.rx.o | FileCheck %s -DMACHINE="Renesas RX" + +# RUN: yaml2obj %s -o %t.metag.o -D MACHINE=EM_METAG +# RUN: llvm-readelf --file-headers %t.metag.o | FileCheck %s -DMACHINE="Imagination Technologies Meta processor architecture" + +# RUN: yaml2obj %s -o %t.mcst_elbrus.o -D MACHINE=EM_MCST_ELBRUS +# RUN: llvm-readelf --file-headers %t.mcst_elbrus.o | FileCheck %s -DMACHINE="MCST Elbrus general purpose hardware architecture" + +# RUN: yaml2obj %s -o %t.ecog16.o -D MACHINE=EM_ECOG16 +# RUN: llvm-readelf --file-headers %t.ecog16.o | FileCheck %s -DMACHINE="Cyan Technology eCOG16 family" + +# RUN: yaml2obj %s -o %t.cr16.o -D MACHINE=EM_CR16 +# RUN: llvm-readelf --file-headers %t.cr16.o | FileCheck %s -DMACHINE="Xilinx MicroBlaze" + +# RUN: yaml2obj %s -o %t.etpu.o -D MACHINE=EM_ETPU +# RUN: llvm-readelf --file-headers %t.etpu.o | FileCheck %s -DMACHINE="Freescale Extended Time Processing Unit" + +# RUN: yaml2obj %s -o %t.sle9x.o -D MACHINE=EM_SLE9X +# RUN: llvm-readelf --file-headers %t.sle9x.o | FileCheck %s -DMACHINE="Infineon Technologies SLE9X core" + +# RUN: yaml2obj %s -o %t.l10m.o -D MACHINE=EM_L10M +# RUN: llvm-readelf --file-headers %t.l10m.o | FileCheck %s -DMACHINE="EM_L10M" + +# RUN: yaml2obj %s -o %t.k10m.o -D MACHINE=EM_K10M +# RUN: llvm-readelf --file-headers %t.k10m.o | FileCheck %s -DMACHINE="EM_K10M" + +# RUN: yaml2obj %s -o %t.aarch64.o -D MACHINE=EM_AARCH64 +# RUN: llvm-readelf --file-headers %t.aarch64.o | FileCheck %s -DMACHINE="AArch64" + +# RUN: yaml2obj %s -o %t.avr32.o -D MACHINE=EM_AVR32 +# RUN: llvm-readelf --file-headers %t.avr32.o | FileCheck %s -DMACHINE="Atmel Corporation 32-bit microprocessor family" + +# RUN: yaml2obj %s -o %t.stm8.o -D MACHINE=EM_STM8 +# RUN: llvm-readelf --file-headers %t.stm8.o | FileCheck %s -DMACHINE="STMicroeletronics STM8 8-bit microcontroller" + +# RUN: yaml2obj %s -o %t.tile64.o -D MACHINE=EM_TILE64 +# RUN: llvm-readelf --file-headers %t.tile64.o | FileCheck %s -DMACHINE="Tilera TILE64 multicore architecture family" + +# RUN: yaml2obj %s -o %t.tilepro.o -D MACHINE=EM_TILEPRO +# RUN: llvm-readelf --file-headers %t.tilepro.o | FileCheck %s -DMACHINE="Tilera TILEPro multicore architecture family" + +# RUN: yaml2obj %s -o %t.cuda.o -D MACHINE=EM_CUDA +# RUN: llvm-readelf --file-headers %t.cuda.o | FileCheck %s -DMACHINE="NVIDIA CUDA architecture" + +# RUN: yaml2obj %s -o %t.tilegx.o -D MACHINE=EM_TILEGX +# RUN: llvm-readelf --file-headers %t.tilegx.o | FileCheck %s -DMACHINE="Tilera TILE-Gx multicore architecture family" + +# RUN: yaml2obj %s -o %t.cloudshield.o -D MACHINE=EM_CLOUDSHIELD +# RUN: llvm-readelf --file-headers %t.cloudshield.o | FileCheck %s -DMACHINE="EM_CLOUDSHIELD" + +# RUN: yaml2obj %s -o %t.corea_1st.o -D MACHINE=EM_COREA_1ST +# RUN: llvm-readelf --file-headers %t.corea_1st.o | FileCheck %s -DMACHINE="EM_COREA_1ST" + +# RUN: yaml2obj %s -o %t.corea_2nd.o -D MACHINE=EM_COREA_2ND +# RUN: llvm-readelf --file-headers %t.corea_2nd.o | FileCheck %s -DMACHINE="EM_COREA_2ND" + +# RUN: yaml2obj %s -o %t.arc_compact2.o -D MACHINE=EM_ARC_COMPACT2 +# RUN: llvm-readelf --file-headers %t.arc_compact2.o | FileCheck %s -DMACHINE="EM_ARC_COMPACT2" + +# RUN: yaml2obj %s -o %t.open8.o -D MACHINE=EM_OPEN8 +# RUN: llvm-readelf --file-headers %t.open8.o | FileCheck %s -DMACHINE="EM_OPEN8" + +# RUN: yaml2obj %s -o %t.rl78.o -D MACHINE=EM_RL78 +# RUN: llvm-readelf --file-headers %t.rl78.o | FileCheck %s -DMACHINE="Renesas RL78" + +# RUN: yaml2obj %s -o %t.videocore5.o -D MACHINE=EM_VIDEOCORE5 +# RUN: llvm-readelf --file-headers %t.videocore5.o | FileCheck %s -DMACHINE="Broadcom VideoCore V processor" + +# RUN: yaml2obj %s -o %t.78kor.o -D MACHINE=EM_78KOR +# RUN: llvm-readelf --file-headers %t.78kor.o | FileCheck %s -DMACHINE="EM_78KOR" + +# RUN: yaml2obj %s -o %t.56800ex.o -D MACHINE=EM_56800EX +# RUN: llvm-readelf --file-headers %t.56800ex.o | FileCheck %s -DMACHINE="EM_56800EX" + +# RUN: yaml2obj %s -o %t.amdgpu.o -D MACHINE=EM_AMDGPU +# RUN: llvm-readelf --file-headers %t.amdgpu.o | FileCheck %s -DMACHINE="EM_AMDGPU" + +# RUN: yaml2obj %s -o %t.riscv.o -D MACHINE=EM_RISCV +# RUN: llvm-readelf --file-headers %t.riscv.o | FileCheck %s -DMACHINE="RISC-V" + +# RUN: yaml2obj %s -o %t.lanai.o -D MACHINE=EM_LANAI +# RUN: llvm-readelf --file-headers %t.lanai.o | FileCheck %s -DMACHINE="EM_LANAI" + +# RUN: yaml2obj %s -o %t.bpf.o -D MACHINE=EM_BPF +# RUN: llvm-readelf --file-headers %t.bpf.o | FileCheck %s -DMACHINE="EM_BPF" + +# RUN: yaml2obj %s -o %t.ve.o -D MACHINE=EM_VE +# RUN: llvm-readelf --file-headers %t.ve.o | FileCheck %s -DMACHINE="NEC SX-Aurora Vector Engine" + +# CHECK: Machine: [[MACHINE]] + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: [[MACHINE]] diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml new file mode 100644 index 00000000000000..ee0049d4d31d51 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml @@ -0,0 +1,184 @@ +## Check we can use "SectionHeaderTable" tag to reorder section header entries. + +## This is a general test that has sections with unique prefixes, a fill and a +## section without the unique prefix. The section header table describes sections +## in the same order they are listed in the YAML. +# RUN: yaml2obj %s --docnum=1 -o %t1 -DSEC1=".section (1)" -DSEC2=".section (2)" -DSEC3=".section.foo" +# RUN: llvm-readelf --section-headers %t1 | FileCheck %s --check-prefix=NO-OP + +# NO-OP: Section Headers: +# NO-OP-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# NO-OP-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# NO-OP-NEXT: [ 1] .section PROGBITS 0000000000000000 000040 000010 00 0 0 0 +# NO-OP-NEXT: [ 2] .section PROGBITS 0000000000000000 000050 000020 00 0 0 0 +# NO-OP-NEXT: [ 3] .section.foo PROGBITS 0000000000000000 0000a0 000040 00 0 0 0 +# NO-OP-NEXT: [ 4] .strtab STRTAB 0000000000000000 0000e0 000001 00 0 0 1 +# NO-OP-NEXT: [ 5] .shstrtab STRTAB 0000000000000000 0000e1 000029 00 0 0 1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .section (1) + Type: SHT_PROGBITS + Size: 0x10 + - Name: .section (2) + Type: SHT_PROGBITS + Size: 0x20 + - Type: Fill + Name: .filler + Size: 0x30 + Pattern: "" + - Name: .section.foo + Type: SHT_PROGBITS + Size: 0x40 +SectionHeaderTable: + Sections: + - Name: [[SEC1]] + - Name: [[SEC2]] + - Name: [[SEC3]] + - Name: .strtab + - Name: .shstrtab + +## Show we are able to reorder sections. +# RUN: yaml2obj %s -o %t2 -DSEC3=".section (1)" -DSEC2=".section (2)" -DSEC1=".section.foo" +# RUN: llvm-readelf --section-headers %t2 | FileCheck %s --check-prefix=REORDERED + +# REORDERED: Section Headers: +# REORDERED-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# REORDERED-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 +# REORDERED-NEXT: [ 1] .section.foo PROGBITS 0000000000000000 0000a0 000040 00 0 0 0 +# REORDERED-NEXT: [ 2] .section PROGBITS 0000000000000000 000050 000020 00 0 0 0 +# REORDERED-NEXT: [ 3] .section PROGBITS 0000000000000000 000040 000010 00 0 0 0 +# REORDERED-NEXT: [ 4] .strtab STRTAB 0000000000000000 0000e0 000001 00 0 0 1 +# REORDERED-NEXT: [ 5] .shstrtab STRTAB 0000000000000000 0000e1 000029 00 0 0 1 + +## Show we report proper errors when the section header description: +## a) contains a repeated section name. +## b) omits any section that exists. +## c) contains a non-existent section. +# RUN: not yaml2obj %s -o /dev/null -DSEC1=".section.foo" -DSEC2="unknown" -DSEC3=".section.foo" 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR1 +# d) contains a repeated implicit section name. +# e) contains a fill name. +# RUN: not yaml2obj %s -o /dev/null -DSEC1=".strtab" -DSEC2=".shstrtab" -DSEC3=".filler" 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 + +# ERR1: error: repeated section name: '.section.foo' in the section header description +# ERR1-NEXT: error: section '.section (1)' should be present in the 'Sections' list +# ERR1-NEXT: error: section '.section (2)' should be present in the 'Sections' list +# ERR1-NEXT: error: section header contains undefined section 'unknown' + +# ERR2: error: repeated section name: '.strtab' in the section header description +# ERR2-NEXT: error: repeated section name: '.shstrtab' in the section header description +# ERR2-NEXT: error: section '.section (1)' should be present in the 'Sections' list +# ERR2-NEXT: error: section '.section (2)' should be present in the 'Sections' list +# ERR2-NEXT: error: section '.section.foo' should be present in the 'Sections' list +# ERR2-NEXT: error: section header contains undefined section '.filler' + +## Test that we are able to specify an empty sections list for +## the "SectionHeaderTable" tag to produce no section header. +# RUN: yaml2obj %s --docnum=2 -o %t3 +# RUN: llvm-readelf --file-headers %t3 | FileCheck %s --check-prefix=NO-HEADERS + +# NO-HEADERS: Start of section headers: 0 (bytes into file) +# NO-HEADERS: Size of section headers: 64 (bytes) +# NO-HEADERS: Number of section headers: 0 +# NO-HEADERS: Section header string table index: 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS +SectionHeaderTable: + Sections: [] + +## Test that we are still able to override e_shoff, e_shnum and e_shstrndx +## fields even when we do not produce section headers. +# RUN: yaml2obj %s --docnum=3 -o %t4 +# RUN: llvm-readelf --file-headers %t4 | FileCheck %s --check-prefix=NO-HEADERS-OVERRIDE + +# NO-HEADERS-OVERRIDE: Start of section headers: 2 (bytes into file) +# NO-HEADERS-OVERRIDE: Number of section headers: 3 +# NO-HEADERS-OVERRIDE: Section header string table index: 4 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SHOff: 0x2 + SHNum: 0x3 + SHStrNdx: 0x4 +Sections: + - Name: .foo + Type: SHT_PROGBITS +SectionHeaderTable: + Sections: [] + +## Check that section indices are updated properly in other places when we +## reorder sections in the section header table. +# RUN: yaml2obj %s --docnum=4 -o %t5 -DSEC1=".foo" -DSEC2=".bar" +# RUN: llvm-readelf --section-headers --symbols %t5 | FileCheck %s --check-prefix=INDICES-A +# RUN: yaml2obj %s --docnum=4 -o %t6 -DSEC2=".foo" -DSEC1=".bar" +# RUN: llvm-readelf --section-headers --symbols %t6 | FileCheck %s --check-prefix=INDICES-B + +# INDICES-A: [Nr] Name Type Address Off Size ES Flg Lk +# INDICES-A: [ 1] .foo PROGBITS 0000000000000000 000040 000000 00 0 +# INDICES-A-NEXT: [ 2] .bar PROGBITS 0000000000000000 000040 000000 00 0 +# INDICES-A-NEXT: [ 3] .another.1 PROGBITS 0000000000000000 000040 000000 00 1 +# INDICES-A-NEXT: [ 4] .another.2 PROGBITS 0000000000000000 000040 000000 00 2 + +# INDICES-A: Num: Value Size Type Bind Vis Ndx Name +# INDICES-A: 1: 0000000000000000 0 NOTYPE LOCAL DEFAULT 1 foo +# INDICES-A-NEXT: 2: 0000000000000000 0 NOTYPE LOCAL DEFAULT 2 bar + +# INDICES-B: [ 1] .bar PROGBITS 0000000000000000 000040 000000 00 0 +# INDICES-B-NEXT: [ 2] .foo PROGBITS 0000000000000000 000040 000000 00 0 +# INDICES-B-NEXT: [ 3] .another.1 PROGBITS 0000000000000000 000040 000000 00 2 +# INDICES-B-NEXT: [ 4] .another.2 PROGBITS 0000000000000000 000040 000000 00 1 + +# INDICES-B: Num: Value Size Type Bind Vis Ndx Name +# INDICES-B: 1: 0000000000000000 0 NOTYPE LOCAL DEFAULT 2 foo +# INDICES-B-NEXT: 2: 0000000000000000 0 NOTYPE LOCAL DEFAULT 1 bar + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS + - Name: .bar + Type: SHT_PROGBITS + - Name: .another.1 + Link: .foo + Type: SHT_PROGBITS + - Name: .another.2 + Link: .bar + Type: SHT_PROGBITS +SectionHeaderTable: + Sections: + - Name: [[SEC1]] + - Name: [[SEC2]] + - Name: .another.1 + - Name: .another.2 + - Name: .symtab + - Name: .strtab + - Name: .shstrtab +Symbols: + - Name: foo + Section: .foo + - Name: bar + Section: .bar diff --git a/llvm/tools/llvm-cov/CoverageFilters.h b/llvm/tools/llvm-cov/CoverageFilters.h index ccaa7a9df5905a..33fd9929c59a26 100644 --- a/llvm/tools/llvm-cov/CoverageFilters.h +++ b/llvm/tools/llvm-cov/CoverageFilters.h @@ -23,7 +23,7 @@ class SpecialCaseList; namespace coverage { class CoverageMapping; struct FunctionRecord; -}; // namespace coverage +} // namespace coverage /// Matches specific functions that pass the requirement of this filter. class CoverageFilter { diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.h b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.h index 86a2f1b553a967..dc41298265d2a5 100644 --- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.h +++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.h @@ -12,7 +12,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 83132869cc2c26..84a68b17b298fe 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1435,6 +1435,8 @@ static const EnumEntry ElfMachineType[] = { ENUM_ENT(EM_STXP7X, "STMicroelectronics STxP7x family"), ENUM_ENT(EM_NDS32, "Andes Technology compact code size embedded RISC processor family"), ENUM_ENT(EM_ECOG1, "Cyan Technology eCOG1 microprocessor"), + // FIXME: Following EM_ECOG1X definitions is dead code since EM_ECOG1X has + // an identical number to EM_ECOG1. ENUM_ENT(EM_ECOG1X, "Cyan Technology eCOG1X family"), ENUM_ENT(EM_MAXQ30, "Dallas Semiconductor MAXQ30 Core microcontrollers"), ENUM_ENT(EM_XIMO16, "New Japan Radio (NJR) 16-bit DSP Processor"), @@ -1469,6 +1471,7 @@ static const EnumEntry ElfMachineType[] = { ENUM_ENT(EM_RISCV, "RISC-V"), ENUM_ENT(EM_LANAI, "EM_LANAI"), ENUM_ENT(EM_BPF, "EM_BPF"), + ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"), }; static const EnumEntry ElfSymbolBindings[] = { diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt index f344d6c7bc25df..6cc14d124b1530 100644 --- a/llvm/unittests/Analysis/CMakeLists.txt +++ b/llvm/unittests/Analysis/CMakeLists.txt @@ -41,3 +41,5 @@ add_llvm_unittest(AnalysisTests ValueTrackingTest.cpp VectorUtilsTest.cpp ) + +add_subdirectory(ML) \ No newline at end of file diff --git a/llvm/unittests/Analysis/ML/CMakeLists.txt b/llvm/unittests/Analysis/ML/CMakeLists.txt new file mode 100644 index 00000000000000..8d1c90312ad0bf --- /dev/null +++ b/llvm/unittests/Analysis/ML/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + Analysis + AsmParser + Core + MLPolicies + Support + TransformUtils + ) + +add_llvm_unittest(MLAnalysisTests + InlineFeaturesAnalysisTest.cpp + ) diff --git a/llvm/unittests/Analysis/ML/InlineFeaturesAnalysisTest.cpp b/llvm/unittests/Analysis/ML/InlineFeaturesAnalysisTest.cpp new file mode 100644 index 00000000000000..4dfc0bd153f710 --- /dev/null +++ b/llvm/unittests/Analysis/ML/InlineFeaturesAnalysisTest.cpp @@ -0,0 +1,77 @@ +//===- InlineFeaturesAnalysisTest.cpp - inline features unit tests --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ML/InlineFeaturesAnalysis.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { + SMDiagnostic Err; + std::unique_ptr Mod = parseAssemblyString(IR, Err, C); + if (!Mod) + Err.print("MLAnalysisTests", errs()); + return Mod; +} + +TEST(InlineFeaturesTest, BasicTest) { + LLVMContext C; + std::unique_ptr M = parseIR(C, + R"IR( +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare i32 @f1(i32) +declare i32 @f2(i32) + +define i32 @branches(i32) { + %cond = icmp slt i32 %0, 3 + br i1 %cond, label %then, label %else + +then: + %ret.1 = call i32 @f1(i32 %0) + br label %last.block + +else: + %ret.2 = call i32 @f2(i32 %0) + br label %last.block + +last.block: + %ret = phi i32 [%ret.1, %then], [%ret.2, %else] + ret i32 %ret +} + +define internal i32 @top() { + %1 = call i32 @branches(i32 2) + %2 = call i32 @f1(i32 %1) + ret i32 %2 +} +)IR"); + + FunctionAnalysisManager FAM; + InlineFeaturesAnalysis FA; + + auto BranchesFeatures = FA.run(*M->getFunction("branches"), FAM); + EXPECT_EQ(BranchesFeatures.BasicBlockCount, 4); + EXPECT_EQ(BranchesFeatures.BlocksReachedFromConditionalInstruction, 2); + EXPECT_EQ(BranchesFeatures.DirectCallsToDefinedFunctions, 0); + // 2 Users: top is one. The other is added because @branches is not internal, + // so it may have external callers. + EXPECT_EQ(BranchesFeatures.Uses, 2); + + auto TopFeatures = FA.run(*M->getFunction("top"), FAM); + EXPECT_EQ(TopFeatures.BasicBlockCount, 1); + EXPECT_EQ(TopFeatures.BlocksReachedFromConditionalInstruction, 0); + EXPECT_EQ(TopFeatures.DirectCallsToDefinedFunctions, 1); + EXPECT_EQ(TopFeatures.Uses, 0); +} diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index d785e469e728eb..038899ada96479 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -1139,11 +1139,12 @@ typedef MetadataTest DISubrangeTest; TEST_F(DISubrangeTest, get) { auto *N = DISubrange::get(Context, 5, 7); auto Count = N->getCount(); + auto Lower = N->getLowerBound(); EXPECT_EQ(dwarf::DW_TAG_subrange_type, N->getTag()); ASSERT_TRUE(Count); ASSERT_TRUE(Count.is()); EXPECT_EQ(5, Count.get()->getSExtValue()); - EXPECT_EQ(7, N->getLowerBound()); + EXPECT_EQ(7, Lower.get()->getSExtValue()); EXPECT_EQ(N, DISubrange::get(Context, 5, 7)); EXPECT_EQ(DISubrange::get(Context, 5, 0), DISubrange::get(Context, 5)); @@ -1154,11 +1155,12 @@ TEST_F(DISubrangeTest, get) { TEST_F(DISubrangeTest, getEmptyArray) { auto *N = DISubrange::get(Context, -1, 0); auto Count = N->getCount(); + auto Lower = N->getLowerBound(); EXPECT_EQ(dwarf::DW_TAG_subrange_type, N->getTag()); ASSERT_TRUE(Count); ASSERT_TRUE(Count.is()); EXPECT_EQ(-1, Count.get()->getSExtValue()); - EXPECT_EQ(0, N->getLowerBound()); + EXPECT_EQ(0, Lower.get()->getSExtValue()); EXPECT_EQ(N, DISubrange::get(Context, -1, 0)); } @@ -1172,15 +1174,146 @@ TEST_F(DISubrangeTest, getVariableCount) { auto *N = DISubrange::get(Context, VlaExpr, 0); auto Count = N->getCount(); + auto Lower = N->getLowerBound(); ASSERT_TRUE(Count); ASSERT_TRUE(Count.is()); EXPECT_EQ(VlaExpr, Count.get()); ASSERT_TRUE(isa(N->getRawCountNode())); - EXPECT_EQ(0, N->getLowerBound()); + EXPECT_EQ(0, Lower.get()->getSExtValue()); EXPECT_EQ("vla_expr", Count.get()->getName()); EXPECT_EQ(N, DISubrange::get(Context, VlaExpr, 0)); } +TEST_F(DISubrangeTest, fortranAllocatableInt) { + DILocalScope *Scope = getSubprogram(); + DIFile *File = getFile(); + DIType *Type = getDerivedType(); + DINode::DIFlags Flags = static_cast(7); + auto *LI = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), -10)); + auto *UI = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 10)); + auto *SI = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 4)); + auto *UIother = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 20)); + auto *UVother = DILocalVariable::get(Context, Scope, "ubother", File, 8, Type, + 2, Flags, 8); + auto *UEother = DIExpression::get(Context, {5, 6}); + auto *LIZero = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 0)); + auto *UIZero = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 0)); + + auto *N = DISubrange::get(Context, nullptr, LI, UI, SI); + + auto Lower = N->getLowerBound(); + ASSERT_TRUE(Lower); + ASSERT_TRUE(Lower.is()); + EXPECT_EQ(cast(LI->getValue()), Lower.get()); + + auto Upper = N->getUpperBound(); + ASSERT_TRUE(Upper); + ASSERT_TRUE(Upper.is()); + EXPECT_EQ(cast(UI->getValue()), Upper.get()); + + auto Stride = N->getStride(); + ASSERT_TRUE(Stride); + ASSERT_TRUE(Stride.is()); + EXPECT_EQ(cast(SI->getValue()), Stride.get()); + + EXPECT_EQ(N, DISubrange::get(Context, nullptr, LI, UI, SI)); + + EXPECT_NE(N, DISubrange::get(Context, nullptr, LI, UIother, SI)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LI, UEother, SI)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LI, UVother, SI)); + + auto *NZeroLower = DISubrange::get(Context, nullptr, LIZero, UI, SI); + EXPECT_NE(NZeroLower, DISubrange::get(Context, nullptr, nullptr, UI, SI)); + + auto *NZeroUpper = DISubrange::get(Context, nullptr, LI, UIZero, SI); + EXPECT_NE(NZeroUpper, DISubrange::get(Context, nullptr, LI, nullptr, SI)); +} + +TEST_F(DISubrangeTest, fortranAllocatableVar) { + DILocalScope *Scope = getSubprogram(); + DIFile *File = getFile(); + DIType *Type = getDerivedType(); + DINode::DIFlags Flags = static_cast(7); + auto *LV = + DILocalVariable::get(Context, Scope, "lb", File, 8, Type, 2, Flags, 8); + auto *UV = + DILocalVariable::get(Context, Scope, "ub", File, 8, Type, 2, Flags, 8); + auto *SV = + DILocalVariable::get(Context, Scope, "st", File, 8, Type, 2, Flags, 8); + auto *SVother = DILocalVariable::get(Context, Scope, "stother", File, 8, Type, + 2, Flags, 8); + auto *SIother = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 20)); + auto *SEother = DIExpression::get(Context, {5, 6}); + + auto *N = DISubrange::get(Context, nullptr, LV, UV, SV); + + auto Lower = N->getLowerBound(); + ASSERT_TRUE(Lower); + ASSERT_TRUE(Lower.is()); + EXPECT_EQ(LV, Lower.get()); + + auto Upper = N->getUpperBound(); + ASSERT_TRUE(Upper); + ASSERT_TRUE(Upper.is()); + EXPECT_EQ(UV, Upper.get()); + + auto Stride = N->getStride(); + ASSERT_TRUE(Stride); + ASSERT_TRUE(Stride.is()); + EXPECT_EQ(SV, Stride.get()); + + EXPECT_EQ(N, DISubrange::get(Context, nullptr, LV, UV, SV)); + + EXPECT_NE(N, DISubrange::get(Context, nullptr, LV, UV, SVother)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LV, UV, SEother)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LV, UV, SIother)); +} + +TEST_F(DISubrangeTest, fortranAllocatableExpr) { + DILocalScope *Scope = getSubprogram(); + DIFile *File = getFile(); + DIType *Type = getDerivedType(); + DINode::DIFlags Flags = static_cast(7); + auto *LE = DIExpression::get(Context, {1, 2}); + auto *UE = DIExpression::get(Context, {2, 3}); + auto *SE = DIExpression::get(Context, {3, 4}); + auto *LEother = DIExpression::get(Context, {5, 6}); + auto *LIother = ConstantAsMetadata::get( + ConstantInt::getSigned(Type::getInt64Ty(Context), 20)); + auto *LVother = DILocalVariable::get(Context, Scope, "lbother", File, 8, Type, + 2, Flags, 8); + + auto *N = DISubrange::get(Context, nullptr, LE, UE, SE); + + auto Lower = N->getLowerBound(); + ASSERT_TRUE(Lower); + ASSERT_TRUE(Lower.is()); + EXPECT_EQ(LE, Lower.get()); + + auto Upper = N->getUpperBound(); + ASSERT_TRUE(Upper); + ASSERT_TRUE(Upper.is()); + EXPECT_EQ(UE, Upper.get()); + + auto Stride = N->getStride(); + ASSERT_TRUE(Stride); + ASSERT_TRUE(Stride.is()); + EXPECT_EQ(SE, Stride.get()); + + EXPECT_EQ(N, DISubrange::get(Context, nullptr, LE, UE, SE)); + + EXPECT_NE(N, DISubrange::get(Context, nullptr, LEother, UE, SE)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LIother, UE, SE)); + EXPECT_NE(N, DISubrange::get(Context, nullptr, LVother, UE, SE)); +} + typedef MetadataTest DIEnumeratorTest; TEST_F(DIEnumeratorTest, get) { diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp index 6faa8abd4cd813..ea8476db1e656b 100644 --- a/llvm/unittests/MI/LiveIntervalTest.cpp +++ b/llvm/unittests/MI/LiveIntervalTest.cpp @@ -432,12 +432,12 @@ TEST(LiveIntervalTest, DeadSubRegMoveUp) { %54:vgpr_32 = V_MOV_B32_e32 1742342378, implicit $exec %57:vgpr_32 = V_MOV_B32_e32 3168768712, implicit $exec %59:vgpr_32 = V_MOV_B32_e32 1039972644, implicit $exec - %60:vgpr_32 = V_MAD_F32 0, %52, 0, undef %61:vgpr_32, 0, %59, 0, 0, implicit $exec - %63:vgpr_32 = V_ADD_F32_e32 %51.sub3, undef %64:vgpr_32, implicit $exec - dead %66:vgpr_32 = V_MAD_F32 0, %60, 0, undef %67:vgpr_32, 0, %125.sub2, 0, 0, implicit $exec - undef %124.sub1:vreg_128 = V_MAD_F32 0, %57, 0, undef %70:vgpr_32, 0, %125.sub1, 0, 0, implicit $exec - %124.sub0:vreg_128 = V_MAD_F32 0, %54, 0, undef %73:vgpr_32, 0, %125.sub0, 0, 0, implicit $exec - dead undef %125.sub3:vreg_128 = V_MAC_F32_e32 %63, undef %76:vgpr_32, %125.sub3, implicit $exec + %60:vgpr_32 = nofpexcept V_MAD_F32 0, %52, 0, undef %61:vgpr_32, 0, %59, 0, 0, implicit $mode, implicit $exec + %63:vgpr_32 = nofpexcept V_ADD_F32_e32 %51.sub3, undef %64:vgpr_32, implicit $mode, implicit $exec + dead %66:vgpr_32 = nofpexcept V_MAD_F32 0, %60, 0, undef %67:vgpr_32, 0, %125.sub2, 0, 0, implicit $mode, implicit $exec + undef %124.sub1:vreg_128 = nofpexcept V_MAD_F32 0, %57, 0, undef %70:vgpr_32, 0, %125.sub1, 0, 0, implicit $mode, implicit $exec + %124.sub0:vreg_128 = nofpexcept V_MAD_F32 0, %54, 0, undef %73:vgpr_32, 0, %125.sub0, 0, 0, implicit $mode, implicit $exec + dead undef %125.sub3:vreg_128 = nofpexcept V_MAC_F32_e32 %63, undef %76:vgpr_32, %125.sub3, implicit $mode, implicit $exec )MIR", [](MachineFunction &MF, LiveIntervals &LIS) { testHandleMove(MF, LIS, 15, 12); }); diff --git a/llvm/unittests/Object/CMakeLists.txt b/llvm/unittests/Object/CMakeLists.txt index c5d1f5476ccdac..1d419eb187d8b9 100644 --- a/llvm/unittests/Object/CMakeLists.txt +++ b/llvm/unittests/Object/CMakeLists.txt @@ -5,6 +5,8 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(ObjectTests ArchiveTest.cpp + ELFObjectFileTest.cpp + ELFTest.cpp MinidumpTest.cpp ObjectFileTest.cpp SymbolSizeTest.cpp diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp new file mode 100644 index 00000000000000..3bbc56b61c6ce4 --- /dev/null +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -0,0 +1,127 @@ +//===- ELFObjectFileTest.cpp - Tests for ELFObjectFile --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; + +template +static Expected> create(ArrayRef Data) { + return ELFObjectFile::create( + MemoryBufferRef(toStringRef(Data), "Test buffer")); +} + +// A class to initialize a buffer to represent an ELF object file. +struct DataForTest { + std::vector Data; + + template + std::vector makeElfData(uint8_t Class, uint8_t Encoding, + uint16_t Machine) { + T Ehdr{}; // Zero-initialise the header. + Ehdr.e_ident[ELF::EI_MAG0] = 0x7f; + Ehdr.e_ident[ELF::EI_MAG1] = 'E'; + Ehdr.e_ident[ELF::EI_MAG2] = 'L'; + Ehdr.e_ident[ELF::EI_MAG3] = 'F'; + Ehdr.e_ident[ELF::EI_CLASS] = Class; + Ehdr.e_ident[ELF::EI_DATA] = Encoding; + Ehdr.e_ident[ELF::EI_VERSION] = 1; + Ehdr.e_type = ELF::ET_REL; + Ehdr.e_machine = Machine; + Ehdr.e_version = 1; + Ehdr.e_ehsize = sizeof(T); + + bool IsLittleEndian = Encoding == ELF::ELFDATA2LSB; + if (sys::IsLittleEndianHost != IsLittleEndian) { + sys::swapByteOrder(Ehdr.e_type); + sys::swapByteOrder(Ehdr.e_machine); + sys::swapByteOrder(Ehdr.e_version); + sys::swapByteOrder(Ehdr.e_ehsize); + } + + uint8_t *EhdrBytes = reinterpret_cast(&Ehdr); + std::vector Bytes; + std::copy(EhdrBytes, EhdrBytes + sizeof(Ehdr), std::back_inserter(Bytes)); + return Bytes; + } + + DataForTest(uint8_t Class, uint8_t Encoding, uint16_t Machine) { + if (Class == ELF::ELFCLASS64) + Data = makeElfData(Class, Encoding, Machine); + else { + assert(Class == ELF::ELFCLASS32); + Data = makeElfData(Class, Encoding, Machine); + } + } +}; + +TEST(ELFObjectFileTest, MachineTestForVE) { + DataForTest Data(ELF::ELFCLASS64, ELF::ELFDATA2LSB, ELF::EM_VE); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf64-ve", File.getFileFormatName()); + EXPECT_EQ(Triple::ve, File.getArch()); +} + +TEST(ELFObjectFileTest, MachineTestForX86_64) { + DataForTest Data(ELF::ELFCLASS64, ELF::ELFDATA2LSB, ELF::EM_X86_64); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf64-x86-64", File.getFileFormatName()); + EXPECT_EQ(Triple::x86_64, File.getArch()); +} + +TEST(ELFObjectFileTest, MachineTestFor386) { + DataForTest Data(ELF::ELFCLASS32, ELF::ELFDATA2LSB, ELF::EM_386); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf32-i386", File.getFileFormatName()); + EXPECT_EQ(Triple::x86, File.getArch()); +} + +TEST(ELFObjectFileTest, MachineTestForMIPS) { + { + DataForTest Data(ELF::ELFCLASS64, ELF::ELFDATA2LSB, ELF::EM_MIPS); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf64-mips", File.getFileFormatName()); + EXPECT_EQ(Triple::mips64el, File.getArch()); + } + { + DataForTest Data(ELF::ELFCLASS64, ELF::ELFDATA2MSB, ELF::EM_MIPS); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf64-mips", File.getFileFormatName()); + EXPECT_EQ(Triple::mips64, File.getArch()); + } + { + DataForTest Data(ELF::ELFCLASS32, ELF::ELFDATA2LSB, ELF::EM_MIPS); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf32-mips", File.getFileFormatName()); + EXPECT_EQ(Triple::mipsel, File.getArch()); + } + { + DataForTest Data(ELF::ELFCLASS32, ELF::ELFDATA2MSB, ELF::EM_MIPS); + auto ExpectedFile = create(Data.Data); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf32-mips", File.getFileFormatName()); + EXPECT_EQ(Triple::mips, File.getArch()); + } +} diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp new file mode 100644 index 00000000000000..b815d5cdd83902 --- /dev/null +++ b/llvm/unittests/Object/ELFTest.cpp @@ -0,0 +1,56 @@ +//===- ELFTest.cpp - Tests for ELF.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::ELF; + +TEST(ELFTest, getELFRelocationTypeNameForVE) { + EXPECT_EQ("R_VE_NONE", getELFRelocationTypeName(EM_VE, R_VE_NONE)); + EXPECT_EQ("R_VE_REFLONG", getELFRelocationTypeName(EM_VE, R_VE_REFLONG)); + EXPECT_EQ("R_VE_REFQUAD", getELFRelocationTypeName(EM_VE, R_VE_REFQUAD)); + EXPECT_EQ("R_VE_SREL32", getELFRelocationTypeName(EM_VE, R_VE_SREL32)); + EXPECT_EQ("R_VE_HI32", getELFRelocationTypeName(EM_VE, R_VE_HI32)); + EXPECT_EQ("R_VE_LO32", getELFRelocationTypeName(EM_VE, R_VE_LO32)); + EXPECT_EQ("R_VE_PC_HI32", getELFRelocationTypeName(EM_VE, R_VE_PC_HI32)); + EXPECT_EQ("R_VE_PC_LO32", getELFRelocationTypeName(EM_VE, R_VE_PC_LO32)); + EXPECT_EQ("R_VE_GOT32", getELFRelocationTypeName(EM_VE, R_VE_GOT32)); + EXPECT_EQ("R_VE_GOT_HI32", getELFRelocationTypeName(EM_VE, R_VE_GOT_HI32)); + EXPECT_EQ("R_VE_GOT_LO32", getELFRelocationTypeName(EM_VE, R_VE_GOT_LO32)); + EXPECT_EQ("R_VE_GOTOFF32", getELFRelocationTypeName(EM_VE, R_VE_GOTOFF32)); + EXPECT_EQ("R_VE_GOTOFF_HI32", + getELFRelocationTypeName(EM_VE, R_VE_GOTOFF_HI32)); + EXPECT_EQ("R_VE_GOTOFF_LO32", + getELFRelocationTypeName(EM_VE, R_VE_GOTOFF_LO32)); + EXPECT_EQ("R_VE_PLT32", getELFRelocationTypeName(EM_VE, R_VE_PLT32)); + EXPECT_EQ("R_VE_PLT_HI32", getELFRelocationTypeName(EM_VE, R_VE_PLT_HI32)); + EXPECT_EQ("R_VE_PLT_LO32", getELFRelocationTypeName(EM_VE, R_VE_PLT_LO32)); + EXPECT_EQ("R_VE_RELATIVE", getELFRelocationTypeName(EM_VE, R_VE_RELATIVE)); + EXPECT_EQ("R_VE_GLOB_DAT", getELFRelocationTypeName(EM_VE, R_VE_GLOB_DAT)); + EXPECT_EQ("R_VE_JUMP_SLOT", getELFRelocationTypeName(EM_VE, R_VE_JUMP_SLOT)); + EXPECT_EQ("R_VE_COPY", getELFRelocationTypeName(EM_VE, R_VE_COPY)); + EXPECT_EQ("R_VE_DTPMOD64", getELFRelocationTypeName(EM_VE, R_VE_DTPMOD64)); + EXPECT_EQ("R_VE_DTPOFF64", getELFRelocationTypeName(EM_VE, R_VE_DTPOFF64)); + EXPECT_EQ("R_VE_TLS_GD_HI32", + getELFRelocationTypeName(EM_VE, R_VE_TLS_GD_HI32)); + EXPECT_EQ("R_VE_TLS_GD_LO32", + getELFRelocationTypeName(EM_VE, R_VE_TLS_GD_LO32)); + EXPECT_EQ("R_VE_TPOFF_HI32", + getELFRelocationTypeName(EM_VE, R_VE_TPOFF_HI32)); + EXPECT_EQ("R_VE_TPOFF_LO32", + getELFRelocationTypeName(EM_VE, R_VE_TPOFF_LO32)); + EXPECT_EQ("R_VE_CALL_HI32", getELFRelocationTypeName(EM_VE, R_VE_CALL_HI32)); + EXPECT_EQ("R_VE_CALL_LO32", getELFRelocationTypeName(EM_VE, R_VE_CALL_LO32)); +} + +TEST(ELFTest, getELFRelativeRelocationType) { + EXPECT_EQ(0U, getELFRelativeRelocationType(EM_VE)); +} diff --git a/llvm/unittests/ObjectYAML/CMakeLists.txt b/llvm/unittests/ObjectYAML/CMakeLists.txt index 45e9c672966d92..04a770a46eb380 100644 --- a/llvm/unittests/ObjectYAML/CMakeLists.txt +++ b/llvm/unittests/ObjectYAML/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(ObjectYAMLTests + ELFYAMLTest.cpp MinidumpYAMLTest.cpp YAML2ObjTest.cpp YAMLTest.cpp diff --git a/llvm/unittests/ObjectYAML/ELFYAMLTest.cpp b/llvm/unittests/ObjectYAML/ELFYAMLTest.cpp new file mode 100644 index 00000000000000..fdbafc28f0d73e --- /dev/null +++ b/llvm/unittests/ObjectYAML/ELFYAMLTest.cpp @@ -0,0 +1,134 @@ +//===- ELFYAMLTest.cpp - Tests for ELFYAML.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/ObjectYAML/yaml2obj.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::object; + +template +static Expected> toBinary(SmallVectorImpl &Storage, + StringRef Yaml) { + Storage.clear(); + raw_svector_ostream OS(Storage); + yaml::Input YIn(Yaml); + if (!yaml::convertYAML(YIn, OS, [](const Twine &Msg) {})) + return createStringError(std::errc::invalid_argument, + "unable to convert YAML"); + + return ELFObjectFile::create(MemoryBufferRef(OS.str(), "Binary")); +} + +TEST(ELFRelocationTypeTest, RelocationTestForVE) { + SmallString<0> Storage; + Expected> ExpectedFile = toBinary(Storage, R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_VE +Sections: + - Name: .rela.text + Type: SHT_RELA + Relocations: + - Type: R_VE_NONE + - Type: R_VE_REFLONG + - Type: R_VE_REFQUAD + - Type: R_VE_SREL32 + - Type: R_VE_HI32 + - Type: R_VE_LO32 + - Type: R_VE_PC_HI32 + - Type: R_VE_PC_LO32 + - Type: R_VE_GOT32 + - Type: R_VE_GOT_HI32 + - Type: R_VE_GOT_LO32 + - Type: R_VE_GOTOFF32 + - Type: R_VE_GOTOFF_HI32 + - Type: R_VE_GOTOFF_LO32 + - Type: R_VE_PLT32 + - Type: R_VE_PLT_HI32 + - Type: R_VE_PLT_LO32 + - Type: R_VE_RELATIVE + - Type: R_VE_GLOB_DAT + - Type: R_VE_JUMP_SLOT + - Type: R_VE_COPY + - Type: R_VE_DTPMOD64 + - Type: R_VE_DTPOFF64 + - Type: R_VE_TLS_GD_HI32 + - Type: R_VE_TLS_GD_LO32 + - Type: R_VE_TPOFF_HI32 + - Type: R_VE_TPOFF_LO32 + - Type: R_VE_CALL_HI32 + - Type: R_VE_CALL_LO32)"); + ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded()); + const ELFObjectFile &File = *ExpectedFile; + EXPECT_EQ("elf64-ve", File.getFileFormatName()); + EXPECT_EQ(Triple::ve, File.getArch()); + + // Test relocation types. + for (SectionRef Sec : File.sections()) { + Expected NameOrErr = Sec.getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + StringRef SectionName = *NameOrErr; + if (SectionName != ".rela.text") + continue; + + for (RelocationRef R : Sec.relocations()) { + SmallString<32> RelTypeName; + using namespace llvm::ELF; + +#define NAME_CHECK(ID) \ + case ID: \ + R.getTypeName(RelTypeName); \ + EXPECT_EQ(#ID, RelTypeName); \ + break + + switch (R.getType()) { + NAME_CHECK(R_VE_NONE); + NAME_CHECK(R_VE_REFLONG); + NAME_CHECK(R_VE_REFQUAD); + NAME_CHECK(R_VE_SREL32); + NAME_CHECK(R_VE_HI32); + NAME_CHECK(R_VE_LO32); + NAME_CHECK(R_VE_PC_HI32); + NAME_CHECK(R_VE_PC_LO32); + NAME_CHECK(R_VE_GOT32); + NAME_CHECK(R_VE_GOT_HI32); + NAME_CHECK(R_VE_GOT_LO32); + NAME_CHECK(R_VE_GOTOFF32); + NAME_CHECK(R_VE_GOTOFF_HI32); + NAME_CHECK(R_VE_GOTOFF_LO32); + NAME_CHECK(R_VE_PLT32); + NAME_CHECK(R_VE_PLT_HI32); + NAME_CHECK(R_VE_PLT_LO32); + NAME_CHECK(R_VE_RELATIVE); + NAME_CHECK(R_VE_GLOB_DAT); + NAME_CHECK(R_VE_JUMP_SLOT); + NAME_CHECK(R_VE_COPY); + NAME_CHECK(R_VE_DTPMOD64); + NAME_CHECK(R_VE_DTPOFF64); + NAME_CHECK(R_VE_TLS_GD_HI32); + NAME_CHECK(R_VE_TLS_GD_LO32); + NAME_CHECK(R_VE_TPOFF_HI32); + NAME_CHECK(R_VE_TPOFF_LO32); + NAME_CHECK(R_VE_CALL_HI32); + NAME_CHECK(R_VE_CALL_LO32); + default: + FAIL() << "Found unexpected relocation type: " + Twine(R.getType()); + break; + } + } + } +} diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index a6b26b310b9771..3e7ec8d5baf18d 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -253,8 +253,8 @@ TEST(CommandLineTest, TokenizeGNUCommandLine) { } TEST(CommandLineTest, TokenizeWindowsCommandLine1) { - const char Input[] = "a\\b c\\\\d e\\\\\"f g\" h\\\"i j\\\\\\\"k \"lmn\" o pqr " - "\"st \\\"u\" \\v"; + const char Input[] = + R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)"; const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k", "lmn", "o", "pqr", "st \"u", "\\v" }; testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output, @@ -268,6 +268,17 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine2) { array_lengthof(Output)); } +TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) { + const char Input1[] = R"(a b c d "")"; + const char *const Output1[] = {"a", "b", "c", "d", ""}; + testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1, + array_lengthof(Output1)); + const char Input2[] = R"(a b c d ")"; + const char *const Output2[] = {"a", "b", "c", "d"}; + testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2, + array_lengthof(Output2)); +} + TEST(CommandLineTest, TokenizeConfigFile1) { const char *Input = "\\"; const char *const Output[] = { "\\" }; diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp index 75b7fba8759d86..54646a036f73ff 100644 --- a/llvm/unittests/Support/FileCheckTest.cpp +++ b/llvm/unittests/Support/FileCheckTest.cpp @@ -88,13 +88,16 @@ struct ExpressionFormatParameterisedFixture bool AllowUpperHex; }; +const uint64_t MaxUint64 = std::numeric_limits::max(); + TEST_P(ExpressionFormatParameterisedFixture, Format) { SourceMgr SM; ExpressionFormat Format(Kind); + bool Signed = Kind == ExpressionFormat::Kind::Signed; Expected WildcardPattern = Format.getWildcardRegex(); ASSERT_THAT_EXPECTED(WildcardPattern, Succeeded()); - Regex WildcardRegex(*WildcardPattern); + Regex WildcardRegex((Twine("^") + *WildcardPattern).str()); ASSERT_TRUE(WildcardRegex.isValid()); // Does not match empty string. EXPECT_FALSE(WildcardRegex.match("")); @@ -103,6 +106,14 @@ TEST_P(ExpressionFormatParameterisedFixture, Format) { StringRef DecimalDigits = "0123456789"; ASSERT_TRUE(WildcardRegex.match(DecimalDigits, &Matches)); EXPECT_EQ(Matches[0], DecimalDigits); + // Matches negative digits. + StringRef MinusFortyTwo = "-42"; + bool MatchSuccess = WildcardRegex.match(MinusFortyTwo, &Matches); + if (Signed) { + ASSERT_TRUE(MatchSuccess); + EXPECT_EQ(Matches[0], MinusFortyTwo); + } else + EXPECT_FALSE(MatchSuccess); // Check non digits or digits with wrong casing are not matched. if (AllowHex) { StringRef HexOnlyDigits[] = {"abcdef", "ABCDEF"}; @@ -121,42 +132,75 @@ TEST_P(ExpressionFormatParameterisedFixture, Format) { EXPECT_FALSE(WildcardRegex.match("A")); } - Expected MatchingString = Format.getMatchingString(0U); + Expected MatchingString = + Format.getMatchingString(ExpressionValue(0u)); ASSERT_THAT_EXPECTED(MatchingString, Succeeded()); EXPECT_EQ(*MatchingString, "0"); - MatchingString = Format.getMatchingString(9U); + MatchingString = Format.getMatchingString(ExpressionValue(9u)); ASSERT_THAT_EXPECTED(MatchingString, Succeeded()); EXPECT_EQ(*MatchingString, "9"); - Expected TenMatchingString = Format.getMatchingString(10U); + MatchingString = Format.getMatchingString(ExpressionValue(-5)); + if (Signed) { + ASSERT_THAT_EXPECTED(MatchingString, Succeeded()); + EXPECT_EQ(*MatchingString, "-5"); + } else { + // Error message tested in ExpressionValue unit tests. + EXPECT_THAT_EXPECTED(MatchingString, Failed()); + } + Expected MaxUint64MatchingString = + Format.getMatchingString(ExpressionValue(MaxUint64)); + Expected TenMatchingString = + Format.getMatchingString(ExpressionValue(10u)); ASSERT_THAT_EXPECTED(TenMatchingString, Succeeded()); - Expected FifteenMatchingString = Format.getMatchingString(15U); + Expected FifteenMatchingString = + Format.getMatchingString(ExpressionValue(15u)); ASSERT_THAT_EXPECTED(FifteenMatchingString, Succeeded()); StringRef ExpectedTenMatchingString, ExpectedFifteenMatchingString; + std::string MaxUint64Str; if (AllowHex) { if (AllowUpperHex) { + MaxUint64Str = "FFFFFFFFFFFFFFFF"; ExpectedTenMatchingString = "A"; ExpectedFifteenMatchingString = "F"; } else { + MaxUint64Str = "ffffffffffffffff"; ExpectedTenMatchingString = "a"; ExpectedFifteenMatchingString = "f"; } } else { + MaxUint64Str = std::to_string(MaxUint64); ExpectedTenMatchingString = "10"; ExpectedFifteenMatchingString = "15"; } + if (Signed) { + // Error message tested in ExpressionValue unit tests. + EXPECT_THAT_EXPECTED(MaxUint64MatchingString, Failed()); + } else { + ASSERT_THAT_EXPECTED(MaxUint64MatchingString, Succeeded()); + EXPECT_EQ(*MaxUint64MatchingString, MaxUint64Str); + } EXPECT_EQ(*TenMatchingString, ExpectedTenMatchingString); EXPECT_EQ(*FifteenMatchingString, ExpectedFifteenMatchingString); StringRef BufferizedValidValueStr = bufferize(SM, "0"); - Expected Val = + Expected Val = Format.valueFromStringRepr(BufferizedValidValueStr, SM); ASSERT_THAT_EXPECTED(Val, Succeeded()); - EXPECT_EQ(*Val, 0U); + EXPECT_EQ(cantFail(Val->getSignedValue()), 0); BufferizedValidValueStr = bufferize(SM, "9"); Val = Format.valueFromStringRepr(BufferizedValidValueStr, SM); ASSERT_THAT_EXPECTED(Val, Succeeded()); - EXPECT_EQ(*Val, 9U); - StringRef BufferizedTenStr, BufferizedInvalidTenStr, BufferizedFifteenStr; + EXPECT_EQ(cantFail(Val->getSignedValue()), 9); + StringRef BufferizedMinusFiveStr = bufferize(SM, "-5"); + Val = Format.valueFromStringRepr(BufferizedMinusFiveStr, SM); + StringRef OverflowErrorStr = "unable to represent numeric value"; + if (Signed) { + ASSERT_THAT_EXPECTED(Val, Succeeded()); + EXPECT_EQ(cantFail(Val->getSignedValue()), -5); + } else + expectDiagnosticError(OverflowErrorStr, Val.takeError()); + StringRef BufferizedMaxUint64Str, BufferizedTenStr, BufferizedInvalidTenStr, + BufferizedFifteenStr; StringRef TenStr, FifteenStr, InvalidTenStr; if (AllowHex) { if (AllowUpperHex) { @@ -173,19 +217,27 @@ TEST_P(ExpressionFormatParameterisedFixture, Format) { FifteenStr = "15"; InvalidTenStr = "A"; } + BufferizedMaxUint64Str = bufferize(SM, MaxUint64Str); + Val = Format.valueFromStringRepr(BufferizedMaxUint64Str, SM); + if (Signed) + expectDiagnosticError(OverflowErrorStr, Val.takeError()); + else { + ASSERT_THAT_EXPECTED(Val, Succeeded()); + EXPECT_EQ(cantFail(Val->getUnsignedValue()), MaxUint64); + } BufferizedTenStr = bufferize(SM, TenStr); Val = Format.valueFromStringRepr(BufferizedTenStr, SM); ASSERT_THAT_EXPECTED(Val, Succeeded()); - EXPECT_EQ(*Val, 10U); + EXPECT_EQ(cantFail(Val->getSignedValue()), 10); BufferizedFifteenStr = bufferize(SM, FifteenStr); Val = Format.valueFromStringRepr(BufferizedFifteenStr, SM); ASSERT_THAT_EXPECTED(Val, Succeeded()); - EXPECT_EQ(*Val, 15U); + EXPECT_EQ(cantFail(Val->getSignedValue()), 15); // Wrong casing is not tested because valueFromStringRepr() relies on // StringRef's getAsInteger() which does not allow to restrict casing. BufferizedInvalidTenStr = bufferize(SM, InvalidTenStr); expectDiagnosticError( - "unable to represent numeric value", + OverflowErrorStr, Format.valueFromStringRepr(bufferize(SM, "G"), SM).takeError()); // Check boolean operator. @@ -197,6 +249,8 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( std::make_tuple(ExpressionFormat::Kind::Unsigned, /*AllowHex=*/false, /*AllowUpperHex=*/false), + std::make_tuple(ExpressionFormat::Kind::Signed, /*AllowHex=*/false, + /*AllowUpperHex=*/false), std::make_tuple(ExpressionFormat::Kind::HexLower, /*AllowHex=*/true, /*AllowUpperHex=*/false), std::make_tuple(ExpressionFormat::Kind::HexUpper, /*AllowHex=*/true, @@ -206,8 +260,9 @@ TEST_F(FileCheckTest, NoFormatProperties) { ExpressionFormat NoFormat(ExpressionFormat::Kind::NoFormat); expectError("trying to match value with invalid format", NoFormat.getWildcardRegex().takeError()); - expectError("trying to match value with invalid format", - NoFormat.getMatchingString(18).takeError()); + expectError( + "trying to match value with invalid format", + NoFormat.getMatchingString(ExpressionValue(18u)).takeError()); EXPECT_FALSE(bool(NoFormat)); } @@ -238,31 +293,221 @@ TEST_F(FileCheckTest, FormatKindEqualityOperators) { EXPECT_FALSE(NoFormat != ExpressionFormat::Kind::NoFormat); } +template +static Expected doValueOperation(binop_eval_t Operation, + T1 LeftValue, T2 RightValue) { + ExpressionValue LeftOperand(LeftValue); + ExpressionValue RightOperand(RightValue); + return Operation(LeftOperand, RightOperand); +} + +template +static void expectValueEqual(ExpressionValue ActualValue, T ExpectedValue) { + EXPECT_EQ(ExpectedValue < 0, ActualValue.isNegative()); + if (ExpectedValue < 0) { + Expected SignedActualValue = ActualValue.getSignedValue(); + ASSERT_THAT_EXPECTED(SignedActualValue, Succeeded()); + EXPECT_EQ(*SignedActualValue, static_cast(ExpectedValue)); + } else { + Expected UnsignedActualValue = ActualValue.getUnsignedValue(); + ASSERT_THAT_EXPECTED(UnsignedActualValue, Succeeded()); + EXPECT_EQ(*UnsignedActualValue, static_cast(ExpectedValue)); + } +} + +template +static void expectOperationValueResult(binop_eval_t Operation, T1 LeftValue, + T2 RightValue, TR ResultValue) { + Expected OperationResult = + doValueOperation(Operation, LeftValue, RightValue); + ASSERT_THAT_EXPECTED(OperationResult, Succeeded()); + expectValueEqual(*OperationResult, ResultValue); +} + +template +static void expectOperationValueResult(binop_eval_t Operation, T1 LeftValue, + T2 RightValue) { + expectError( + "overflow error", + doValueOperation(Operation, LeftValue, RightValue).takeError()); +} + +const int64_t MinInt64 = std::numeric_limits::min(); +const int64_t MaxInt64 = std::numeric_limits::max(); + +TEST_F(FileCheckTest, ExpressionValueGetUnsigned) { + // Test positive value. + Expected UnsignedValue = ExpressionValue(10).getUnsignedValue(); + ASSERT_THAT_EXPECTED(UnsignedValue, Succeeded()); + EXPECT_EQ(*UnsignedValue, 10U); + + // Test 0. + UnsignedValue = ExpressionValue(0).getUnsignedValue(); + ASSERT_THAT_EXPECTED(UnsignedValue, Succeeded()); + EXPECT_EQ(*UnsignedValue, 0U); + + // Test max positive value. + UnsignedValue = ExpressionValue(MaxUint64).getUnsignedValue(); + ASSERT_THAT_EXPECTED(UnsignedValue, Succeeded()); + EXPECT_EQ(*UnsignedValue, MaxUint64); + + // Test failure with negative value. + expectError( + "overflow error", ExpressionValue(-1).getUnsignedValue().takeError()); + + // Test failure with min negative value. + expectError( + "overflow error", + ExpressionValue(MinInt64).getUnsignedValue().takeError()); +} + +TEST_F(FileCheckTest, ExpressionValueGetSigned) { + // Test positive value. + Expected SignedValue = ExpressionValue(10).getSignedValue(); + ASSERT_THAT_EXPECTED(SignedValue, Succeeded()); + EXPECT_EQ(*SignedValue, 10); + + // Test 0. + SignedValue = ExpressionValue(0).getSignedValue(); + ASSERT_THAT_EXPECTED(SignedValue, Succeeded()); + EXPECT_EQ(*SignedValue, 0); + + // Test max int64_t. + SignedValue = ExpressionValue(MaxInt64).getSignedValue(); + ASSERT_THAT_EXPECTED(SignedValue, Succeeded()); + EXPECT_EQ(*SignedValue, MaxInt64); + + // Test failure with too big positive value. + expectError( + "overflow error", ExpressionValue(static_cast(MaxInt64) + 1) + .getSignedValue() + .takeError()); + + // Test failure with max uint64_t. + expectError( + "overflow error", + ExpressionValue(MaxUint64).getSignedValue().takeError()); + + // Test negative value. + SignedValue = ExpressionValue(-10).getSignedValue(); + ASSERT_THAT_EXPECTED(SignedValue, Succeeded()); + EXPECT_EQ(*SignedValue, -10); + + // Test min int64_t. + SignedValue = ExpressionValue(MinInt64).getSignedValue(); + ASSERT_THAT_EXPECTED(SignedValue, Succeeded()); + EXPECT_EQ(*SignedValue, MinInt64); +} + +TEST_F(FileCheckTest, ExpressionValueAbsolute) { + // Test positive value. + expectValueEqual(ExpressionValue(10).getAbsolute(), 10); + + // Test 0. + expectValueEqual(ExpressionValue(0).getAbsolute(), 0); + + // Test max uint64_t. + expectValueEqual(ExpressionValue(MaxUint64).getAbsolute(), MaxUint64); + + // Test negative value. + expectValueEqual(ExpressionValue(-10).getAbsolute(), 10); + + // Test absence of overflow on min int64_t. + expectValueEqual(ExpressionValue(MinInt64).getAbsolute(), + static_cast(-(MinInt64 + 10)) + 10); +} + +TEST_F(FileCheckTest, ExpressionValueAddition) { + // Test both negative values. + expectOperationValueResult(operator+, -10, -10, -20); + + // Test both negative values with underflow. + expectOperationValueResult(operator+, MinInt64, -1); + expectOperationValueResult(operator+, MinInt64, MinInt64); + + // Test negative and positive value. + expectOperationValueResult(operator+, -10, 10, 0); + expectOperationValueResult(operator+, -10, 11, 1); + expectOperationValueResult(operator+, -11, 10, -1); + + // Test positive and negative value. + expectOperationValueResult(operator+, 10, -10, 0); + expectOperationValueResult(operator+, 10, -11, -1); + expectOperationValueResult(operator+, 11, -10, 1); + + // Test both positive values. + expectOperationValueResult(operator+, 10, 10, 20); + + // Test both positive values with overflow. + expectOperationValueResult(operator+, MaxUint64, 1); + expectOperationValueResult(operator+, MaxUint64, MaxUint64); +} + +TEST_F(FileCheckTest, ExpressionValueSubtraction) { + // Test negative value and value bigger than int64_t max. + expectOperationValueResult(operator-, -10, MaxUint64); + + // Test negative and positive value with underflow. + expectOperationValueResult(operator-, MinInt64, 1); + + // Test negative and positive value. + expectOperationValueResult(operator-, -10, 10, -20); + + // Test both negative values. + expectOperationValueResult(operator-, -10, -10, 0); + expectOperationValueResult(operator-, -11, -10, -1); + expectOperationValueResult(operator-, -10, -11, 1); + + // Test positive and negative values. + expectOperationValueResult(operator-, 10, -10, 20); + + // Test both positive values with result positive. + expectOperationValueResult(operator-, 10, 5, 5); + + // Test both positive values with underflow. + expectOperationValueResult(operator-, 0, MaxUint64); + expectOperationValueResult(operator-, 0, + static_cast(-(MinInt64 + 10)) + 11); + + // Test both positive values with result < -(max int64_t) + expectOperationValueResult(operator-, 10, + static_cast(MaxInt64) + 11, + -MaxInt64 - 1); + + // Test both positive values with 0 > result > -(max int64_t) + expectOperationValueResult(operator-, 10, 11, -1); +} + TEST_F(FileCheckTest, Literal) { SourceMgr SM; // Eval returns the literal's value. - ExpressionLiteral Ten(bufferize(SM, "10"), 10); - Expected Value = Ten.eval(); + ExpressionLiteral Ten(bufferize(SM, "10"), 10u); + Expected Value = Ten.eval(); ASSERT_THAT_EXPECTED(Value, Succeeded()); - EXPECT_EQ(10U, *Value); + EXPECT_EQ(10, cantFail(Value->getSignedValue())); Expected ImplicitFormat = Ten.getImplicitFormat(SM); ASSERT_THAT_EXPECTED(ImplicitFormat, Succeeded()); EXPECT_EQ(*ImplicitFormat, ExpressionFormat::Kind::NoFormat); + // Min value can be correctly represented. + ExpressionLiteral Min(bufferize(SM, std::to_string(MinInt64)), MinInt64); + Value = Min.eval(); + ASSERT_TRUE(bool(Value)); + EXPECT_EQ(MinInt64, cantFail(Value->getSignedValue())); + // Max value can be correctly represented. - uint64_t MaxUint64 = std::numeric_limits::max(); ExpressionLiteral Max(bufferize(SM, std::to_string(MaxUint64)), MaxUint64); Value = Max.eval(); ASSERT_THAT_EXPECTED(Value, Succeeded()); - EXPECT_EQ(std::numeric_limits::max(), *Value); + EXPECT_EQ(MaxUint64, cantFail(Value->getUnsignedValue())); } TEST_F(FileCheckTest, Expression) { SourceMgr SM; std::unique_ptr Ten = - std::make_unique(bufferize(SM, "10"), 10); + std::make_unique(bufferize(SM, "10"), 10u); ExpressionLiteral *TenPtr = Ten.get(); Expression Expr(std::move(Ten), ExpressionFormat(ExpressionFormat::Kind::HexLower)); @@ -283,8 +528,6 @@ expectUndefErrors(std::unordered_set ExpectedUndefVarNames, EXPECT_TRUE(ExpectedUndefVarNames.empty()) << toString(ExpectedUndefVarNames); } -uint64_t doAdd(uint64_t OpL, uint64_t OpR) { return OpL + OpR; } - TEST_F(FileCheckTest, NumericVariable) { SourceMgr SM; @@ -299,18 +542,18 @@ TEST_F(FileCheckTest, NumericVariable) { ASSERT_THAT_EXPECTED(ImplicitFormat, Succeeded()); EXPECT_EQ(*ImplicitFormat, ExpressionFormat::Kind::Unsigned); EXPECT_FALSE(FooVar.getValue()); - Expected EvalResult = FooVarUse.eval(); + Expected EvalResult = FooVarUse.eval(); expectUndefErrors({"FOO"}, EvalResult.takeError()); - FooVar.setValue(42); + FooVar.setValue(ExpressionValue(42u)); // Defined variable: getValue and eval return value set. - Optional Value = FooVar.getValue(); + Optional Value = FooVar.getValue(); ASSERT_TRUE(Value); - EXPECT_EQ(42U, *Value); + EXPECT_EQ(42, cantFail(Value->getSignedValue())); EvalResult = FooVarUse.eval(); ASSERT_THAT_EXPECTED(EvalResult, Succeeded()); - EXPECT_EQ(42U, *EvalResult); + EXPECT_EQ(42, cantFail(EvalResult->getSignedValue())); // Clearing variable: getValue and eval fail. Error returned by eval holds // the name of the cleared variable. @@ -327,23 +570,24 @@ TEST_F(FileCheckTest, Binop) { StringRef FooStr = ExprStr.take_front(3); NumericVariable FooVar(FooStr, ExpressionFormat(ExpressionFormat::Kind::Unsigned), 1); - FooVar.setValue(42); + FooVar.setValue(ExpressionValue(42u)); std::unique_ptr FooVarUse = std::make_unique(FooStr, &FooVar); StringRef BarStr = ExprStr.take_back(3); NumericVariable BarVar(BarStr, ExpressionFormat(ExpressionFormat::Kind::Unsigned), 2); - BarVar.setValue(18); + BarVar.setValue(ExpressionValue(18u)); std::unique_ptr BarVarUse = std::make_unique(BarStr, &BarVar); + binop_eval_t doAdd = operator+; BinaryOperation Binop(ExprStr, doAdd, std::move(FooVarUse), std::move(BarVarUse)); // Defined variables: eval returns right value; implicit format is as // expected. - Expected Value = Binop.eval(); + Expected Value = Binop.eval(); ASSERT_THAT_EXPECTED(Value, Succeeded()); - EXPECT_EQ(60U, *Value); + EXPECT_EQ(60, cantFail(Value->getSignedValue())); Expected ImplicitFormat = Binop.getImplicitFormat(SM); ASSERT_THAT_EXPECTED(ImplicitFormat, Succeeded()); EXPECT_EQ(*ImplicitFormat, ExpressionFormat::Kind::Unsigned); @@ -366,7 +610,7 @@ TEST_F(FileCheckTest, Binop) { StringRef EighteenStr = ExprStr.take_back(2); FooVarUse = std::make_unique(FooStr, &FooVar); std::unique_ptr Eighteen = - std::make_unique(EighteenStr, 18); + std::make_unique(EighteenStr, 18u); Binop = BinaryOperation(ExprStr, doAdd, std::move(FooVarUse), std::move(Eighteen)); ImplicitFormat = Binop.getImplicitFormat(SM); @@ -376,7 +620,7 @@ TEST_F(FileCheckTest, Binop) { FooStr = ExprStr.take_back(3); EighteenStr = ExprStr.take_front(2); FooVarUse = std::make_unique(FooStr, &FooVar); - Eighteen = std::make_unique(EighteenStr, 18); + Eighteen = std::make_unique(EighteenStr, 18u); Binop = BinaryOperation(ExprStr, doAdd, std::move(Eighteen), std::move(FooVarUse)); ImplicitFormat = Binop.getImplicitFormat(SM); @@ -655,6 +899,13 @@ TEST_F(FileCheckTest, ParseNumericSubstitutionBlock) { // Valid single operand expression. EXPECT_THAT_EXPECTED(Tester.parseSubst("FOO"), Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst("18"), Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst(std::to_string(MaxUint64)), + Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst("0x12"), Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst("-30"), Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst(std::to_string(MinInt64)), + Succeeded()); // Invalid format. expectDiagnosticError("invalid matching format specification in expression", @@ -697,6 +948,7 @@ TEST_F(FileCheckTest, ParseNumericSubstitutionBlock) { // Valid expression with format specifier. EXPECT_THAT_EXPECTED(Tester.parseSubst("%u, FOO"), Succeeded()); + EXPECT_THAT_EXPECTED(Tester.parseSubst("%d, FOO"), Succeeded()); EXPECT_THAT_EXPECTED(Tester.parseSubst("%x, FOO"), Succeeded()); EXPECT_THAT_EXPECTED(Tester.parseSubst("%X, FOO"), Succeeded()); @@ -804,7 +1056,14 @@ TEST_F(FileCheckTest, ParsePattern) { TEST_F(FileCheckTest, Match) { PatternTester Tester; + // Check a substitution error is diagnosed. + ASSERT_FALSE(Tester.parsePattern("[[#%u, -1]]")); + expectDiagnosticError( + "unable to substitute variable or numeric expression: overflow error", + Tester.match("").takeError()); + // Check matching an empty expression only matches a number. + Tester.initNextPattern(); ASSERT_FALSE(Tester.parsePattern("[[#]]")); expectNotFoundError(Tester.match("FAIL").takeError()); EXPECT_THAT_EXPECTED(Tester.match("18"), Succeeded()); @@ -946,7 +1205,7 @@ TEST_F(FileCheckTest, Substitution) { // substituted for the variable's value. NumericVariable NVar("N", ExpressionFormat(ExpressionFormat::Kind::Unsigned), 1); - NVar.setValue(10); + NVar.setValue(ExpressionValue(10u)); auto NVarUse = std::make_unique("N", &NVar); auto ExpressionN = std::make_unique( std::move(NVarUse), ExpressionFormat(ExpressionFormat::Kind::HexUpper)); @@ -1056,24 +1315,24 @@ TEST_F(FileCheckTest, FileCheckContext) { Expected EmptyVar = Cxt.getPatternVarValue(EmptyVarStr); Expected UnknownVar = Cxt.getPatternVarValue(UnknownVarStr); ASSERT_THAT_EXPECTED(ExpressionPointer, Succeeded()); - Expected ExpressionVal = (*ExpressionPointer)->getAST()->eval(); + Expected ExpressionVal = + (*ExpressionPointer)->getAST()->eval(); ASSERT_THAT_EXPECTED(ExpressionVal, Succeeded()); - EXPECT_EQ(*ExpressionVal, 18U); + EXPECT_EQ(cantFail(ExpressionVal->getSignedValue()), 18); ExpressionPointer = P.parseNumericSubstitutionBlock( LocalNumVar2Ref, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, LineNumber, &Cxt, SM); ASSERT_THAT_EXPECTED(ExpressionPointer, Succeeded()); ExpressionVal = (*ExpressionPointer)->getAST()->eval(); ASSERT_THAT_EXPECTED(ExpressionVal, Succeeded()); - EXPECT_EQ(*ExpressionVal, 20U); - ExpressionPointer = - P.parseNumericSubstitutionBlock(LocalNumVar3Ref, DefinedNumericVariable, - /*IsLegacyLineExpr=*/false, - LineNumber, &Cxt, SM); + EXPECT_EQ(cantFail(ExpressionVal->getSignedValue()), 20); + ExpressionPointer = P.parseNumericSubstitutionBlock( + LocalNumVar3Ref, DefinedNumericVariable, + /*IsLegacyLineExpr=*/false, LineNumber, &Cxt, SM); ASSERT_THAT_EXPECTED(ExpressionPointer, Succeeded()); ExpressionVal = (*ExpressionPointer)->getAST()->eval(); ASSERT_THAT_EXPECTED(ExpressionVal, Succeeded()); - EXPECT_EQ(*ExpressionVal, 12U); + EXPECT_EQ(cantFail(ExpressionVal->getSignedValue()), 12); ASSERT_THAT_EXPECTED(EmptyVar, Succeeded()); EXPECT_EQ(*EmptyVar, ""); expectUndefErrors({std::string(UnknownVarStr)}, UnknownVar.takeError()); @@ -1123,7 +1382,7 @@ TEST_F(FileCheckTest, FileCheckContext) { ASSERT_THAT_EXPECTED(ExpressionPointer, Succeeded()); ExpressionVal = (*ExpressionPointer)->getAST()->eval(); ASSERT_THAT_EXPECTED(ExpressionVal, Succeeded()); - EXPECT_EQ(*ExpressionVal, 36U); + EXPECT_EQ(cantFail(ExpressionVal->getSignedValue()), 36); // Clear local variables and check global variables remain defined. Cxt.clearLocalVars(); @@ -1135,6 +1394,6 @@ TEST_F(FileCheckTest, FileCheckContext) { ASSERT_THAT_EXPECTED(ExpressionPointer, Succeeded()); ExpressionVal = (*ExpressionPointer)->getAST()->eval(); ASSERT_THAT_EXPECTED(ExpressionVal, Succeeded()); - EXPECT_EQ(*ExpressionVal, 36U); + EXPECT_EQ(cantFail(ExpressionVal->getSignedValue()), 36); } } // namespace diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index 72c7baa139a9d6..d10ea71e97e3f1 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -267,6 +267,27 @@ static void UnescapeString(std::string &Str) { } } +/// UnescapeAliasString - Supports literal braces in InstAlias asm string which +/// are escaped with '\\' to avoid being interpreted as variants. Braces must +/// be unescaped before c++ code is generated as (e.g.): +/// +/// AsmString = "foo \{$\x01\}"; +/// +/// causes non-standard escape character warnings. +static void UnescapeAliasString(std::string &Str) { + for (unsigned i = 0; i != Str.size(); ++i) { + if (Str[i] == '\\' && i != Str.size()-1) { + switch (Str[i+1]) { + default: continue; // Don't execute the code after the switch. + case '{': Str[i] = '{'; break; + case '}': Str[i] = '}'; break; + } + // Nuke the second character. + Str.erase(Str.begin()+i+1); + } + } +} + /// EmitPrintInstruction - Generate the code for the "printInstruction" method /// implementation. Destroys all instances of AsmWriterInst information, by /// clearing the Instructions vector. @@ -803,6 +824,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { std::string FlatAliasAsmString = CodeGenInstruction::FlattenAsmStringVariants(CGA.AsmString, Variant); + UnescapeAliasString(FlatAliasAsmString); // Don't emit the alias if it has more operands than what it's aliasing. if (NumResultOps < CountNumOperands(FlatAliasAsmString, Variant)) diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h index 824bb944753bf6..6503f39cfd8ead 100644 --- a/llvm/utils/TableGen/CodeGenIntrinsics.h +++ b/llvm/utils/TableGen/CodeGenIntrinsics.h @@ -142,17 +142,32 @@ struct CodeGenIntrinsic { // True if the intrinsic is marked as speculatable. bool isSpeculatable; - enum ArgAttribute { + enum ArgAttrKind { NoCapture, NoAlias, Returned, ReadOnly, WriteOnly, ReadNone, - ImmArg + ImmArg, + Alignment }; - std::vector> ArgumentAttributes; + struct ArgAttribute { + unsigned Index; + ArgAttrKind Kind; + uint64_t Value; + + ArgAttribute(unsigned Idx, ArgAttrKind K, uint64_t V) + : Index(Idx), Kind(K), Value(V) {} + + bool operator<(const ArgAttribute &Other) const { + return std::tie(Index, Kind, Value) < + std::tie(Other.Index, Other.Kind, Other.Value); + } + }; + + std::vector ArgumentAttributes; bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 282e62cf838e0a..78fb7328770130 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -795,25 +795,29 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { hasSideEffects = true; else if (Property->isSubClassOf("NoCapture")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture)); + ArgumentAttributes.emplace_back(ArgNo, NoCapture, 0); } else if (Property->isSubClassOf("NoAlias")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, NoAlias)); + ArgumentAttributes.emplace_back(ArgNo, NoAlias, 0); } else if (Property->isSubClassOf("Returned")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned)); + ArgumentAttributes.emplace_back(ArgNo, Returned, 0); } else if (Property->isSubClassOf("ReadOnly")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadOnly)); + ArgumentAttributes.emplace_back(ArgNo, ReadOnly, 0); } else if (Property->isSubClassOf("WriteOnly")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, WriteOnly)); + ArgumentAttributes.emplace_back(ArgNo, WriteOnly, 0); } else if (Property->isSubClassOf("ReadNone")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadNone)); + ArgumentAttributes.emplace_back(ArgNo, ReadNone, 0); } else if (Property->isSubClassOf("ImmArg")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); - ArgumentAttributes.push_back(std::make_pair(ArgNo, ImmArg)); + ArgumentAttributes.emplace_back(ArgNo, ImmArg, 0); + } else if (Property->isSubClassOf("Align")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + uint64_t Align = Property->getValueAsInt("Align"); + ArgumentAttributes.emplace_back(ArgNo, Alignment, Align); } else llvm_unreachable("Unknown property!"); } @@ -833,7 +837,8 @@ bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const { } bool CodeGenIntrinsic::isParamImmArg(unsigned ParamIdx) const { - std::pair Val = {ParamIdx, ImmArg}; + // Convert argument index to attribute index starting from `FirstArgIndex`. + ArgAttribute Val{ParamIdx + 1, ImmArg, 0}; return std::binary_search(ArgumentAttributes.begin(), ArgumentAttributes.end(), Val); } diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index f05fd9fd39fe23..ab42f33cf23fdf 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -663,14 +663,15 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, unsigned ai = 0, ae = intrinsic.ArgumentAttributes.size(); if (ae) { while (ai != ae) { - unsigned argNo = intrinsic.ArgumentAttributes[ai].first; - unsigned attrIdx = argNo + 1; // Must match AttributeList::FirstArgIndex + unsigned attrIdx = intrinsic.ArgumentAttributes[ai].Index; OS << " const Attribute::AttrKind AttrParam" << attrIdx << "[]= {"; bool addComma = false; + bool AllValuesAreZero = true; + SmallVector Values; do { - switch (intrinsic.ArgumentAttributes[ai].second) { + switch (intrinsic.ArgumentAttributes[ai].Kind) { case CodeGenIntrinsic::NoCapture: if (addComma) OS << ","; @@ -713,13 +714,39 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, OS << "Attribute::ImmArg"; addComma = true; break; + case CodeGenIntrinsic::Alignment: + if (addComma) + OS << ','; + OS << "Attribute::Alignment"; + addComma = true; + break; } + uint64_t V = intrinsic.ArgumentAttributes[ai].Value; + Values.push_back(V); + AllValuesAreZero &= (V == 0); ++ai; - } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo); + } while (ai != ae && intrinsic.ArgumentAttributes[ai].Index == attrIdx); OS << "};\n"; + + // Generate attribute value array if not all attribute values are zero. + if (!AllValuesAreZero) { + OS << " const uint64_t AttrValParam" << attrIdx << "[]= {"; + addComma = false; + for (const auto V : Values) { + if (addComma) + OS << ','; + OS << V; + addComma = true; + } + OS << "};\n"; + } + OS << " AS[" << numAttrs++ << "] = AttributeList::get(C, " - << attrIdx << ", AttrParam" << attrIdx << ");\n"; + << attrIdx << ", AttrParam" << attrIdx; + if (!AllValuesAreZero) + OS << ", AttrValParam" << attrIdx; + OS << ");\n"; } } diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn index 0629d9c19231fc..1ada201611d5cb 100644 --- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn @@ -129,6 +129,7 @@ static_library("Checkers") { "ValistChecker.cpp", "VforkChecker.cpp", "VirtualCallChecker.cpp", + "WebKit/NoUncountedMembersChecker.cpp", "WebKit/PtrTypesSemantics.cpp", "WebKit/RefCntblBaseVirtualDtorChecker.cpp", "cert/PutenvWithAutoChecker.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/ML/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/ML/BUILD.gn new file mode 100644 index 00000000000000..86b86252eed052 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/ML/BUILD.gn @@ -0,0 +1,10 @@ +static_library("MLPolicies") { + output_name = "LLVMMLPolicies" + deps = [ + "//llvm/lib/IR", + "//llvm/lib/Support", + ] + sources = [ + "InlineFeaturesAnalysis.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index 5044fc16a7aacc..e1779112abe423 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -2,6 +2,7 @@ static_library("Passes") { output_name = "LLVMPasses" deps = [ "//llvm/lib/Analysis", + "//llvm/lib/Analysis/ML:MLPolicies", "//llvm/lib/CodeGen", "//llvm/lib/IR", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/ML/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/ML/BUILD.gn new file mode 100644 index 00000000000000..8c924603358bad --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/ML/BUILD.gn @@ -0,0 +1,15 @@ +import("//llvm/utils/unittest/unittest.gni") + +unittest("MLAnalysisTests") { + deps = [ + "//llvm/lib/Analysis", + "//llvm/lib/AsmParser", + "//llvm/lib/IR", + "//llvm/lib/Analysis/ML:MLPolicies", + "//llvm/lib/Support", + "//llvm/lib/Transforms/Utils", + ] + sources = [ + "InlineFeaturesAnalysisTest.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn index 3d960d501e4255..23557304b5715b 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/BUILD.gn @@ -4,6 +4,7 @@ group("unittests") { deps = [ "ADT:ADTTests", "Analysis:AnalysisTests", + "Analysis/ML:MLAnalysisTests", "AsmParser:AsmParserTests", "BinaryFormat:BinaryFormatTests", "Bitcode:BitcodeTests", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn index 13399818337596..0272e9247f410d 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn @@ -8,6 +8,8 @@ unittest("ObjectTests") { ] sources = [ "ArchiveTest.cpp", + "ELFObjectFileTest.cpp", + "ELFTest.cpp", "MinidumpTest.cpp", "ObjectFileTest.cpp", "SymbolSizeTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ObjectYAML/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ObjectYAML/BUILD.gn index 2fbcaa94334d99..7a5855292c4031 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ObjectYAML/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ObjectYAML/BUILD.gn @@ -7,6 +7,7 @@ unittest("ObjectYAMLTests") { "//llvm/lib/Testing/Support", ] sources = [ + "ELFYAMLTest.cpp", "MinidumpYAMLTest.cpp", "YAML2ObjTest.cpp", "YAMLTest.cpp", diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index a9759fc6a73431..406aac2db99a27 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -102,7 +102,7 @@ def Shape_ConstShapeOp : Shape_Op<"const_shape", [ConstantLike, NoSideEffect]> { %1 = shape.const_shape [1, 2, 3] ``` }]; - let arguments = (ins I64ElementsAttr:$shape); + let arguments = (ins IndexElementsAttr:$shape); let results = (outs Shape_ShapeType:$result); // TODO: Move this to main so that all shape ops implement these. @@ -206,13 +206,8 @@ def Shape_GetExtentOp : Shape_Op<"get_extent", let builders = [ // Builder that allows passing a simple integer instead of an IntegerAttr. OpBuilder< - [{ - OpBuilder &builder, OperationState &result, - Value shape, int64_t dim - }], - [{ - build(builder, result, shape, builder.getI64IntegerAttr(dim)); - }] + [{OpBuilder &builder, OperationState &result, Value shape, int64_t dim}], + [{build(builder, result, shape, builder.getI64IntegerAttr(dim));}] > ]; diff --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h index bc59d3de208604..604e90258ca039 100644 --- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h +++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h @@ -106,6 +106,12 @@ using Vector3D = Vector; template using Vector4D = Vector; +template +void dropFront(int64_t arr[N], int64_t *res) { + for (unsigned i = 1; i < N; ++i) + *(res + i - 1) = arr[i]; +} + //===----------------------------------------------------------------------===// // Codegen-compatible structures for StridedMemRef type. //===----------------------------------------------------------------------===// @@ -123,10 +129,6 @@ struct StridedMemRefType { res.basePtr = basePtr; res.data = data; res.offset = offset + idx * strides[0]; - auto dropFront = [](const int64_t *arr, int64_t *res) { - for (unsigned i = 1; i < N; ++i) - res[i - 1] = arr[i]; - }; dropFront(sizes, res.sizes); dropFront(strides, res.strides); return res; @@ -209,3 +211,4 @@ extern "C" MLIR_CRUNNERUTILS_EXPORT void print_comma(); extern "C" MLIR_CRUNNERUTILS_EXPORT void print_newline(); #endif // EXECUTIONENGINE_CRUNNERUTILS_H_ + diff --git a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h index 914cab78dee74a..d0ad8326bac899 100644 --- a/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h +++ b/mlir/include/mlir/ExecutionEngine/ExecutionEngine.h @@ -94,16 +94,9 @@ class ExecutionEngine { /// pointer to it. Propagates errors in case of failure. llvm::Expected lookup(StringRef name) const; - /// Invokes the function with the given name passing it the list of arguments. - /// The arguments are accepted by lvalue-reference since the packed function - /// interface expects a list of non-null pointers. - template - llvm::Error invoke(StringRef name, Args &... args); - /// Invokes the function with the given name passing it the list of arguments - /// as a list of opaque pointers. This is the arity-agnostic equivalent of - /// the templated `invoke`. - llvm::Error invoke(StringRef name, MutableArrayRef args); + /// as a list of opaque pointers. + llvm::Error invoke(StringRef name, MutableArrayRef args = llvm::None); /// Set the target triple on the module. This is implicitly done when creating /// the engine. @@ -135,19 +128,6 @@ class ExecutionEngine { llvm::JITEventListener *perfListener; }; -template -llvm::Error ExecutionEngine::invoke(StringRef name, Args &... args) { - auto expectedFPtr = lookup(name); - if (!expectedFPtr) - return expectedFPtr.takeError(); - auto fptr = *expectedFPtr; - - SmallVector packedArgs{static_cast(&args)...}; - (*fptr)(packedArgs.data()); - - return llvm::Error::success(); -} - } // end namespace mlir #endif // MLIR_EXECUTIONENGINE_EXECUTIONENGINE_H_ diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index 4ade6bb1e4390d..0dcf4daf656fd9 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -128,6 +128,7 @@ class Builder { /// as attributes. DenseIntElementsAttr getI32TensorAttr(ArrayRef values); DenseIntElementsAttr getI64TensorAttr(ArrayRef values); + DenseIntElementsAttr getIndexTensorAttr(ArrayRef values); ArrayAttr getAffineMapArrayAttr(ArrayRef values); ArrayAttr getBoolArrayAttr(ArrayRef values); @@ -373,6 +374,10 @@ class OpBuilder : public Builder { template OpTy create(Location location, Args &&... args) { OperationState state(location, OpTy::getOperationName()); + if (!state.name.getAbstractOperation()) + llvm::report_fatal_error("Building op `" + + state.name.getStringRef().str() + + "` but it isn't registered in this MLIRContext"); OpTy::build(*this, state, std::forward(args)...); auto *op = createOperation(state); auto result = dyn_cast(op); @@ -389,6 +394,10 @@ class OpBuilder : public Builder { // Create the operation without using 'createOperation' as we don't want to // insert it yet. OperationState state(location, OpTy::getOperationName()); + if (!state.name.getAbstractOperation()) + llvm::report_fatal_error("Building op `" + + state.name.getStringRef().str() + + "` but it isn't registered in this MLIRContext"); OpTy::build(*this, state, std::forward(args)...); Operation *op = Operation::create(state); diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index da0b0bd826ceda..8e75bb62444931 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -85,6 +85,9 @@ class MLIRContext { /// directly. std::vector getRegisteredOperations(); + /// Return true if this operation name is registered in this context. + bool isOperationRegistered(StringRef name); + // This is effectively private given that only MLIRContext.cpp can see the // MLIRContextImpl type. MLIRContextImpl &getImpl() { return *impl; } diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 6a7542c7127c03..5ffb1727ee3533 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1218,6 +1218,13 @@ class IntElementsAttrBase : let convertFromStorage = "$_self"; } +def IndexElementsAttr + : IntElementsAttrBase() + .getType() + .getElementType() + .isIndex()}]>, + "index elements attribute">; + class AnyIntElementsAttr : IntElementsAttrBase< CPred<"$_self.cast().getType()." "getElementType().isInteger(" # width # ")">, diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index bf5bd70c2b7fe3..e92d54ec84f9bd 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -1235,7 +1235,10 @@ class Op : public OpState, static bool classof(Operation *op) { if (auto *abstractOp = op->getAbstractOperation()) return TypeID::get() == abstractOp->typeID; - return op->getName().getStringRef() == ConcreteType::getOperationName(); + assert(op->getContext()->isOperationRegistered( + ConcreteType::getOperationName()) && + "Casting attempt to an unregistered operation"); + return false; } /// This is the hook used by the AsmParser to parse the custom form of this diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt index 61b6b61597f9d3..8b3e89768c55d9 100644 --- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt @@ -8,6 +8,7 @@ add_mlir_dialect_library(MLIRLinalgUtils MLIRAffineOps MLIREDSC MLIRIR + MLIRLinalgEDSC MLIRLinalgOps MLIRSCF MLIRPass diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index cd8b17650bb114..c48b87aaa4e449 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -129,25 +129,29 @@ template struct mlir::linalg::GenerateLoopNest; template struct mlir::linalg::GenerateLoopNest; template struct mlir::linalg::GenerateLoopNest; +namespace mlir { +namespace linalg { /// Specialization of loop nest generator for scf.parallel loops to handle /// iterator types that are not parallel. These are generated as sequential /// loops. template <> -void mlir::linalg::GenerateLoopNest::doit( - MutableArrayRef allIvs, ArrayRef loopRanges, - ArrayRef iteratorTypes, std::function fun) { +void GenerateLoopNest::doit(MutableArrayRef allIvs, + ArrayRef loopRanges, + ArrayRef iteratorTypes, + std::function fun) { edsc::GenericLoopNestRangeBuilder(allIvs, loopRanges)(fun); } template <> -void mlir::linalg::GenerateLoopNest::doit( - MutableArrayRef allIvs, ArrayRef loopRanges, - ArrayRef iteratorTypes, std::function fun) { +void GenerateLoopNest::doit(MutableArrayRef allIvs, + ArrayRef loopRanges, + ArrayRef iteratorTypes, + std::function fun) { edsc::GenericLoopNestRangeBuilder(allIvs, loopRanges)(fun); } template <> -void mlir::linalg::GenerateLoopNest::doit( +void GenerateLoopNest::doit( MutableArrayRef allIvs, ArrayRef loopRanges, ArrayRef iteratorTypes, std::function fun) { // Check if there is nothing to do here. This is also the recursion @@ -190,3 +194,5 @@ void mlir::linalg::GenerateLoopNest::doit( allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar), iteratorTypes.take_front(nOuterPar), nestedFn); } +} // namespace linalg +} // namespace mlir diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index fa9552fc869455..c4a8b15298171e 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -177,7 +177,7 @@ OpFoldResult BroadcastOp::fold(ArrayRef operands) { if (!OpTrait::util::getBroadcastedShape(lhsShape, rhsShape, resultShape)) return nullptr; Builder builder(getContext()); - return builder.getI64TensorAttr(resultShape); + return builder.getIndexTensorAttr(resultShape); } //===----------------------------------------------------------------------===// @@ -215,7 +215,7 @@ static ParseResult parseConstShapeOp(OpAsmParser &parser, ints.push_back(attr.getInt()); } Builder &builder = parser.getBuilder(); - result.addAttribute("shape", builder.getI64TensorAttr(ints)); + result.addAttribute("shape", builder.getIndexTensorAttr(ints)); result.types.push_back(ShapeType::get(builder.getContext())); return success(); @@ -257,7 +257,7 @@ OpFoldResult FromExtentsOp::fold(ArrayRef operands) { for (auto attr : operands) extents.push_back(attr.cast().getInt()); Builder builder(getContext()); - return builder.getI64TensorAttr(extents); + return builder.getIndexTensorAttr(extents); } //===----------------------------------------------------------------------===// @@ -281,14 +281,7 @@ OpFoldResult GetExtentOp::fold(ArrayRef operands) { // TODO: Constant fold this to some kind of constant error. if (dimToGet >= (uint64_t)elements.getNumElements()) return nullptr; - // This is a little inconvenient because getValue returns an IntegerAttr - // that is not of IndexType, but the result here needs to be of - // IndexType. - // TODO: Make ConstShapeOp hold an tensor of index instead of i64. - Builder builder(getContext()); - return builder.getIntegerAttr( - builder.getIndexType(), - elements.getValue({dimToGet}).getInt()); + return elements.getValue({dimToGet}); } //===----------------------------------------------------------------------===// @@ -309,7 +302,7 @@ OpFoldResult ShapeOfOp::fold(ArrayRef) { if (!type || !type.hasStaticShape()) return nullptr; Builder builder(getContext()); - return builder.getI64TensorAttr(type.getShape()); + return builder.getIndexTensorAttr(type.getShape()); } //===----------------------------------------------------------------------===// @@ -343,8 +336,8 @@ LogicalResult SplitAtOp::fold(ArrayRef operands, if (splitPoint < 0) splitPoint += shape.size(); Builder builder(operands[0].getContext()); - results.push_back(builder.getI64TensorAttr(shape.take_front(splitPoint))); - results.push_back(builder.getI64TensorAttr(shape.drop_front(splitPoint))); + results.push_back(builder.getIndexTensorAttr(shape.take_front(splitPoint))); + results.push_back(builder.getIndexTensorAttr(shape.drop_front(splitPoint))); return success(); } @@ -373,7 +366,7 @@ OpFoldResult ConcatOp::fold(ArrayRef operands) { resultShape.append(lhsShape.begin(), lhsShape.end()); resultShape.append(rhsShape.begin(), rhsShape.end()); Builder builder(getContext()); - return builder.getI64TensorAttr(resultShape); + return builder.getIndexTensorAttr(resultShape); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/Attributes.cpp b/mlir/lib/IR/Attributes.cpp index 540c3c6258e29f..12fd08787fa75d 100644 --- a/mlir/lib/IR/Attributes.cpp +++ b/mlir/lib/IR/Attributes.cpp @@ -624,6 +624,8 @@ Attribute DenseElementsAttr::AttributeElementIterator::operator*() const { owner.getContext()); return IntegerAttr::get(eltTy, *IntElementIterator(owner, index)); } + if (eltTy.isa()) + return IntegerAttr::get(eltTy, *IntElementIterator(owner, index)); if (auto floatEltTy = eltTy.dyn_cast()) { IntElementIterator intIt(owner, index); FloatElementIterator floatIt(floatEltTy.getFloatSemantics(), intIt); diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index a72e03c739e3bb..064889724f0927 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -130,6 +130,13 @@ DenseIntElementsAttr Builder::getI64TensorAttr(ArrayRef values) { values); } +DenseIntElementsAttr Builder::getIndexTensorAttr(ArrayRef values) { + return DenseIntElementsAttr::get( + RankedTensorType::get(static_cast(values.size()), + getIndexType()), + values); +} + IntegerAttr Builder::getI32IntegerAttr(int32_t value) { return IntegerAttr::get(getIntegerType(32), APInt(32, value)); } diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 0728f294be861d..da607a2319bfc9 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -543,6 +543,13 @@ std::vector MLIRContext::getRegisteredOperations() { return result; } +bool MLIRContext::isOperationRegistered(StringRef name) { + // Lock access to the context registry. + ScopedReaderLock registryLock(impl->contextMutex, impl->threadingIsEnabled); + + return impl->registeredOperations.count(name); +} + void Dialect::addOperation(AbstractOperation opInfo) { assert((getNamespace().empty() || opInfo.name.split('.').first == getNamespace()) && @@ -621,8 +628,9 @@ Identifier Identifier::get(StringRef str, MLIRContext *context) { static Dialect &lookupDialectForSymbol(MLIRContext *ctx, TypeID typeID) { auto &impl = ctx->getImpl(); auto it = impl.registeredDialectSymbols.find(typeID); - assert(it != impl.registeredDialectSymbols.end() && - "symbol is not registered."); + if (it == impl.registeredDialectSymbols.end()) + llvm::report_fatal_error( + "Trying to create a type that was not registered in this MLIRContext."); return *it->second; } diff --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir index 018f5b212b4e45..23147e557a1516 100644 --- a/mlir/test/Dialect/Shape/canonicalize.mlir +++ b/mlir/test/Dialect/Shape/canonicalize.mlir @@ -15,7 +15,7 @@ func @f() -> (!shape.shape, !shape.shape) { // CHECK: shape.const_shape [2, 3] // CHECK: shape.const_shape [4, 5] %c2 = constant 2 : i32 - %0 = "shape.const_shape"() {shape = dense<[2, 3, 4, 5]> : tensor<4xi64>} : () -> !shape.shape + %0 = shape.const_shape [2, 3, 4, 5] %head, %tail = "shape.split_at"(%0, %c2) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) return %head, %tail : !shape.shape, !shape.shape @@ -28,7 +28,7 @@ func @f() -> (!shape.shape, !shape.shape) { // CHECK: shape.const_shape [2, 3, 4] // CHECK: shape.const_shape [5] %c-1 = constant -1 : i32 - %0 = "shape.const_shape"() {shape = dense<[2, 3, 4, 5]> : tensor<4xi64>} : () -> !shape.shape + %0 = shape.const_shape [2, 3, 4, 5] %head, %tail = "shape.split_at"(%0, %c-1) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) return %head, %tail : !shape.shape, !shape.shape } @@ -39,7 +39,7 @@ func @f() -> (!shape.shape, !shape.shape) { func @f() -> (!shape.shape, !shape.shape) { // CHECK: shape.split_at %c5 = constant 5 : i32 - %0 = "shape.const_shape"() {shape = dense<[2, 3, 4, 5]> : tensor<4xi64>} : () -> !shape.shape + %0 = shape.const_shape [2, 3, 4, 5] %head, %tail = "shape.split_at"(%0, %c5) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) return %head, %tail : !shape.shape, !shape.shape } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 997d8eb44ae591..8e5b380dff452d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -454,6 +454,10 @@ def I32ElementsAttrOp : TEST_Op<"i32ElementsAttr"> { let arguments = (ins I32ElementsAttr:$attr); } +def IndexElementsAttrOp : TEST_Op<"indexElementsAttr"> { + let arguments = (ins IndexElementsAttr:$attr); +} + def OpWithInferTypeInterfaceOp : TEST_Op<"op_with_infer_type_if", [ DeclareOpInterfaceMethods]> { let arguments = (ins AnyTensor, AnyTensor); diff --git a/mlir/test/mlir-tblgen/types.mlir b/mlir/test/mlir-tblgen/types.mlir index 6a0a80ca5e5fc7..5e4dac33012b97 100644 --- a/mlir/test/mlir-tblgen/types.mlir +++ b/mlir/test/mlir-tblgen/types.mlir @@ -489,3 +489,18 @@ func @elements_attr_i32(%arg0: tensor<1x2xi32>) { "test.i32ElementsAttr"() {attr = dense<[1, 2]>:tensor<2xi32>} : () -> () return } + +// ----- + +func @elements_attr_index() { + "test.indexElementsAttr"() {attr = dense<[1, 2]>:tensor<2xindex>} : () -> () + return +} + +// ----- + +func @elements_attr_not_index() { + // expected-error@+1 {{index elements attribute}} + "test.indexElementsAttr"() {attr = dense<[1, 2]>:tensor<2xi32>} : () -> () + return +}